Index: src/Makefile ================================================================== --- src/Makefile +++ src/Makefile @@ -48,10 +48,11 @@ OFString+Hashing.m \ OFString+Serialization.m \ OFString+URLEncoding.m \ OFString+XMLEscaping.m \ OFString+XMLUnescaping.m \ + OFString_UTF8.m \ OFTCPSocket.m \ ${OFTHREAD_M} \ OFURL.m \ OFXMLAttribute.m \ OFXMLCDATA.m \ @@ -82,10 +83,11 @@ OFCountedSet_hashtable.m \ OFDictionary_hashtable.m \ OFMutableArray_adjacent.m \ OFMutableDictionary_hashtable.m \ OFMutableSet_hashtable.m \ + OFMutableString_UTF8.m \ OFSet_hashtable.m \ ${OFSTREAMOBSERVER_KQUEUE_M} \ ${OFSTREAMOBSERVER_POLL_M} \ ${OFSTREAMOBSERVER_SELECT_M} \ OFTCPSocket+SOCKS5.m \ Index: src/OFConstantString.h ================================================================== --- src/OFConstantString.h +++ src/OFConstantString.h @@ -28,13 +28,10 @@ /** * \brief A class for storing constant strings using the \@"" literal. */ @interface OFConstantString: OFString -/** - * \brief Completes initialization of the OFConstantString - * - * This method finishes the initialization, as the constant strings created by - * the compiler are not fully initialized. - */ -- (void)finishInitialization; +{ + char *cString; + size_t cStringLength; +} @end Index: src/OFConstantString.m ================================================================== --- src/OFConstantString.m +++ src/OFConstantString.m @@ -18,10 +18,11 @@ #include #include #import "OFConstantString.h" +#import "OFString_UTF8.h" #import "OFInvalidEncodingException.h" #import "OFNotImplementedException.h" #import "OFOutOfMemoryException.h" @@ -28,35 +29,112 @@ #ifdef OF_APPLE_RUNTIME # import void *_OFConstantStringClassReference; #endif + +@interface OFString_const: OFString_UTF8 +@end + +@implementation OFString_const ++ alloc +{ + @throw [OFNotImplementedException exceptionWithClass: self + selector: _cmd]; +} + +- (void)addMemoryToPool: (void*)ptr +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void*)allocMemoryWithSize: (size_t)size +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void*)allocMemoryForNItems: (size_t)nitems + withSize: (size_t)size +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void*)resizeMemory: (void*)ptr + toSize: (size_t)size +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void*)resizeMemory: (void*)ptr + toNItems: (size_t)nitems + withSize: (size_t)size +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void)freeMemory: (void*)ptr +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- retain +{ + return self; +} + +- autorelease +{ + return self; +} + +- (unsigned int)retainCount +{ + return OF_RETAIN_COUNT_MAX; +} + +- (void)release +{ +} + +- (void)dealloc +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; + [super dealloc]; /* Get rid of a stupid warning */ +} +@end @implementation OFConstantString -#ifdef OF_APPLE_RUNTIME + (void)load { +#ifdef OF_APPLE_RUNTIME objc_setFutureClass((Class)&_OFConstantStringClassReference, "OFConstantString"); -} #endif + + if (self == [OFConstantString class]) + [self inheritMethodsFromClass: [OFString_UTF8 class]]; +} - (void)finishInitialization { - struct of_string_ivars *ivars; - - if (initialized == SIZE_MAX) - return; + struct of_string_utf8_ivars *ivars; if ((ivars = malloc(sizeof(*ivars))) == NULL) @throw [OFOutOfMemoryException exceptionWithClass: isa requestedSize: sizeof(*ivars)]; memset(ivars, 0, sizeof(*ivars)); - ivars->cString = (char*)s; - ivars->cStringLength = initialized; + ivars->cString = cString; + ivars->cStringLength = cStringLength; switch (of_string_check_utf8(ivars->cString, ivars->cStringLength, &ivars->length)) { case 1: ivars->UTF8 = YES; @@ -64,221 +142,20 @@ case -1: free(ivars); @throw [OFInvalidEncodingException exceptionWithClass: isa]; } - s = ivars; - initialized = SIZE_MAX; + cString = (char*)ivars; + isa = [OFString_const class]; } -/* - * The following methods are not available since it's a constant string, which - * can't be allocated or initialized at runtime. - */ + alloc { @throw [OFNotImplementedException exceptionWithClass: self selector: _cmd]; } -- init -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUTF8String: (const char*)UTF8String -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUTF8String: (const char*)UTF8String - length: (size_t)UTF8StringLength -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithCString: (const char*)cString - encoding: (of_string_encoding_t)encoding -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithCString: (const char*)cString - encoding: (of_string_encoding_t)encoding - length: (size_t)cStringLength -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithString: (OFString*)string -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUnicodeString: (const of_unichar_t*)string -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUnicodeString: (const of_unichar_t*)string - byteOrder: (of_endianess_t)byteOrder -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUnicodeString: (const of_unichar_t*)string - length: (size_t)length -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUnicodeString: (const of_unichar_t*)string - byteOrder: (of_endianess_t)byteOrder - length: (size_t)length -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUTF16String: (const uint16_t*)string - byteOrder: (of_endianess_t)byteOrder -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUTF16String: (const uint16_t*)string - length: (size_t)length -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithUTF16String: (const uint16_t*)string - byteOrder: (of_endianess_t)byteOrder - length: (size_t)length -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithFormat: (OFConstantString*)format, ... -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithFormat: (OFConstantString*)format - arguments: (va_list)args -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithPath: (OFString*)firstComponent, ... -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithPath: (OFString*)firstComponent - arguments: (va_list)arguments -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithContentsOfFile: (OFString*)path -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithContentsOfFile: (OFString*)path - encoding: (of_string_encoding_t)encoding -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithContentsOfURL: (OFURL*)URL -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -- initWithContentsOfURL: (OFURL*)URL - encoding: (of_string_encoding_t)encoding -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -/* From protocol OFSerializing */ -- initWithSerialization: (OFXMLElement*)element -{ - Class c = isa; - [self release]; - @throw [OFNotImplementedException exceptionWithClass: c - selector: _cmd]; -} - -/* - * The following methods are not available because constant strings are - * preallocated by the compiler and thus don't have a memory pool. - */ - (void)addMemoryToPool: (void*)ptr { @throw [OFNotImplementedException exceptionWithClass: isa selector: _cmd]; } @@ -315,14 +192,10 @@ { @throw [OFNotImplementedException exceptionWithClass: isa selector: _cmd]; } -/* - * The following methods are unnecessary because constant strings are - * singletons. - */ - retain { return self; } @@ -346,321 +219,291 @@ selector: _cmd]; [super dealloc]; /* Get rid of a stupid warning */ } /* - * In all following methods, it is checked whether the constant string has been - * initialized. If not, it will be initialized. Finally, the implementation of - * the superclass will be called. + * In all following methods, the constant string is converted to an + * OFString_UTF8 and the message sent again. */ /* From protocol OFCopying */ - copy { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super copy]; + return [self copy]; } /* From protocol OFMutableCopying */ - mutableCopy { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super mutableCopy]; + return [self mutableCopy]; } /* From protocol OFComparing */ - (of_comparison_result_t)compare: (id)object { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super compare: object]; + return [self compare: object]; } /* From OFObject, but reimplemented in OFString */ - (BOOL)isEqual: (id)object { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super isEqual: object]; + return [self isEqual: object]; } - (uint32_t)hash { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super hash]; + return [self hash]; } - (OFString*)description { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super description]; + return [self description]; } /* From OFString */ - (const char*)UTF8String { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super UTF8String]; + return [self UTF8String]; } - (const char*)cStringWithEncoding: (of_string_encoding_t)encoding { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super cStringWithEncoding: encoding]; + return [self cStringWithEncoding: encoding]; } - (size_t)length { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super length]; + return [self length]; } - (size_t)UTF8StringLength { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super UTF8StringLength]; + return [self UTF8StringLength]; } - (size_t)cStringLengthWithEncoding: (of_string_encoding_t)encoding { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super cStringLengthWithEncoding: encoding]; + return [self cStringLengthWithEncoding: encoding]; } - (of_comparison_result_t)caseInsensitiveCompare: (OFString*)otherString { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super caseInsensitiveCompare: otherString]; + return [self caseInsensitiveCompare: otherString]; } - (of_unichar_t)characterAtIndex: (size_t)index { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; + + return [self characterAtIndex: index]; +} + +- (void)getCharacters: (of_unichar_t*)buffer + inRange: (of_range_t)range +{ + [self finishInitialization]; - return [super characterAtIndex: index]; + return [self getCharacters: buffer + inRange: range]; } - (size_t)indexOfFirstOccurrenceOfString: (OFString*)string { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super indexOfFirstOccurrenceOfString: string]; + return [self indexOfFirstOccurrenceOfString: string]; } - (size_t)indexOfLastOccurrenceOfString: (OFString*)string { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super indexOfLastOccurrenceOfString: string]; + return [self indexOfLastOccurrenceOfString: string]; } - (BOOL)containsString: (OFString*)string { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super containsString: string]; + return [self containsString: string]; } - (OFString*)substringWithRange: (of_range_t)range { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super substringWithRange: range]; + return [self substringWithRange: range]; } - (OFString*)stringByAppendingString: (OFString*)string { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByAppendingString: string]; + return [self stringByAppendingString: string]; } - (OFString*)stringByPrependingString: (OFString*)string { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByPrependingString: string]; + return [self stringByPrependingString: string]; } - (OFString*)uppercaseString { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super uppercaseString]; + return [self uppercaseString]; } - (OFString*)lowercaseString { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super lowercaseString]; + return [self lowercaseString]; } - (OFString*)stringByDeletingLeadingWhitespaces { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByDeletingLeadingWhitespaces]; + return [self stringByDeletingLeadingWhitespaces]; } - (OFString*)stringByDeletingTrailingWhitespaces { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByDeletingTrailingWhitespaces]; + return [self stringByDeletingTrailingWhitespaces]; } - (OFString*)stringByDeletingEnclosingWhitespaces { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByDeletingEnclosingWhitespaces]; + return [self stringByDeletingEnclosingWhitespaces]; } - (BOOL)hasPrefix: (OFString*)prefix { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super hasPrefix: prefix]; + return [self hasPrefix: prefix]; } - (BOOL)hasSuffix: (OFString*)suffix { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super hasSuffix: suffix]; + return [self hasSuffix: suffix]; } - (OFArray*)componentsSeparatedByString: (OFString*)delimiter { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super componentsSeparatedByString: delimiter]; + return [self componentsSeparatedByString: delimiter]; } - (OFArray*)pathComponents { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super pathComponents]; + return [self pathComponents]; } - (OFString*)lastPathComponent { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super lastPathComponent]; + return [self lastPathComponent]; } - (OFString*)stringByDeletingLastPathComponent { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super stringByDeletingLastPathComponent]; + return [self stringByDeletingLastPathComponent]; } - (intmax_t)decimalValue { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super decimalValue]; + return [self decimalValue]; } - (uintmax_t)hexadecimalValue { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super hexadecimalValue]; + return [self hexadecimalValue]; } - (float)floatValue { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super floatValue]; + return [self floatValue]; } - (double)doubleValue { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super doubleValue]; + return [self doubleValue]; } - (const of_unichar_t*)unicodeString { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super unicodeString]; + return [self unicodeString]; } - (const uint16_t*)UTF16String { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super UTF16String]; + return [self UTF16String]; } - (void)writeToFile: (OFString*)path { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super writeToFile: path]; + return [self writeToFile: path]; } #ifdef OF_HAVE_BLOCKS - (void)enumerateLinesUsingBlock: (of_string_line_enumeration_block_t)block { - if (initialized != SIZE_MAX) - [self finishInitialization]; + [self finishInitialization]; - return [super enumerateLinesUsingBlock: block]; + return [self enumerateLinesUsingBlock: block]; } #endif @end Index: src/OFMutableString.h ================================================================== --- src/OFMutableString.h +++ src/OFMutableString.h @@ -21,10 +21,19 @@ /** * \brief A class for storing and modifying strings. */ @interface OFMutableString: OFString +/** + * \brief Sets the character at the specified index. + * + * \param character The character to set + * \param index The index where to set the character + */ +- (void)setCharacter: (of_unichar_t)character + atIndex: (size_t)index; + /** * \brief Appends a UTF-8 encoded C string to the OFMutableString. * * \param UTF8String A UTF-8 encoded C string to append */ Index: src/OFMutableString.m ================================================================== --- src/OFMutableString.m +++ src/OFMutableString.m @@ -15,232 +15,317 @@ */ #include "config.h" #include -#include #include #include -#include #import "OFString.h" +#import "OFMutableString_UTF8.h" #import "OFAutoreleasePool.h" #import "OFInvalidArgumentException.h" -#import "OFInvalidEncodingException.h" #import "OFInvalidFormatException.h" -#import "OFOutOfMemoryException.h" -#import "OFOutOfRangeException.h" +#import "OFNotImplementedException.h" #import "macros.h" #import "of_asprintf.h" #import "unicode.h" + +static struct { + Class isa; +} placeholder; + +@interface OFMutableString_placeholder: OFMutableString +@end + +@implementation OFMutableString_placeholder +- init +{ + return (id)[[OFMutableString_UTF8 alloc] init]; +} + +- initWithUTF8String: (const char*)UTF8String +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUTF8String: UTF8String]; +} + +- initWithUTF8String: (const char*)UTF8String + length: (size_t)UTF8StringLength +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUTF8String: UTF8String + length: UTF8StringLength]; +} + +- initWithCString: (const char*)cString + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFMutableString_UTF8 alloc] initWithCString: cString + encoding: encoding]; +} + +- initWithCString: (const char*)cString + encoding: (of_string_encoding_t)encoding + length: (size_t)cStringLength +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithCString: cString + encoding: encoding + length: cStringLength]; +} + +- initWithString: (OFString*)string +{ + return (id)[[OFMutableString_UTF8 alloc] initWithString: string]; +} + +- initWithUnicodeString: (const of_unichar_t*)string +{ + return (id)[[OFMutableString_UTF8 alloc] initWithUnicodeString: string]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + byteOrder: (of_endianess_t)byteOrder +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUnicodeString: string + byteOrder: byteOrder]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + length: (size_t)length +{ + return (id)[[OFMutableString_UTF8 alloc] initWithUnicodeString: string + length: length]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUnicodeString: string + byteOrder: byteOrder + length: length]; +} + +- initWithUTF16String: (const uint16_t*)string +{ + return (id)[[OFMutableString_UTF8 alloc] initWithUTF16String: string]; +} + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_endianess_t)byteOrder +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUTF16String: string + byteOrder: byteOrder]; +} + +- initWithUTF16String: (const uint16_t*)string + length: (size_t)length +{ + return (id)[[OFMutableString_UTF8 alloc] initWithUTF16String: string + length: length]; +} + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithUTF16String: string + byteOrder: byteOrder + length: length]; +} + +- initWithFormat: (OFConstantString*)format, ... +{ + id ret; + va_list arguments; + + va_start(arguments, format); + ret = [[OFMutableString_UTF8 alloc] initWithFormat: format + arguments: arguments]; + va_end(arguments); + + return ret; +} + +- initWithFormat: (OFConstantString*)format + arguments: (va_list)arguments +{ + return (id)[[OFMutableString_UTF8 alloc] initWithFormat: format + arguments: arguments]; +} + +- initWithPath: (OFString*)firstComponent, ... +{ + id ret; + va_list arguments; + + va_start(arguments, firstComponent); + ret = [[OFMutableString_UTF8 alloc] initWithPath: firstComponent + arguments: arguments]; + va_end(arguments); + + return ret; +} + +- initWithPath: (OFString*)firstComponent + arguments: (va_list)arguments +{ + return (id)[[OFMutableString_UTF8 alloc] initWithPath: firstComponent + arguments: arguments]; +} + +- initWithContentsOfFile: (OFString*)path +{ + return (id)[[OFMutableString_UTF8 alloc] initWithContentsOfFile: path]; +} + +- initWithContentsOfFile: (OFString*)path + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithContentsOfFile: path + encoding: encoding]; +} + +- initWithContentsOfURL: (OFURL*)URL +{ + return (id)[[OFMutableString_UTF8 alloc] initWithContentsOfURL: URL]; +} + +- initWithContentsOfURL: (OFURL*)URL + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithContentsOfURL: URL + encoding: encoding]; +} + +- initWithSerialization: (OFXMLElement*)element +{ + return (id)[[OFMutableString_UTF8 alloc] + initWithSerialization: element]; +} + +- retain +{ + return self; +} + +- autorelease +{ + return self; +} + +- (void)release +{ +} + +- (void)dealloc +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; + [super dealloc]; /* Get rid of a stupid warning */ +} +@end @implementation OFMutableString ++ (void)initialize +{ + if (self == [OFMutableString class]) + placeholder.isa = [OFMutableString_placeholder class]; +} + ++ alloc +{ + if (self == [OFMutableString class]) + return (id)&placeholder; + + return [super alloc]; +} + - (void)_applyTable: (const of_unichar_t* const[])table withSize: (size_t)tableSize { - of_unichar_t c; - of_unichar_t *unicodeString; - size_t unicodeLen, newCStringLength, cLen; - size_t i, j, d; - char *newCString; - - if (!s->UTF8) { - assert(tableSize >= 1); - - uint8_t *p = (uint8_t*)s->cString + s->cStringLength; - uint8_t t; - - while (--p >= (uint8_t*)s->cString) - if ((t = table[0][*p]) != 0) - *p = t; - - return; - } - - unicodeLen = [self length]; - unicodeString = [self allocMemoryForNItems: unicodeLen - ofSize: sizeof(of_unichar_t)]; - - i = j = 0; - newCStringLength = 0; - - while (i < s->cStringLength) { - cLen = of_string_utf8_to_unicode(s->cString + i, - s->cStringLength - i, &c); - - if (cLen == 0 || c > 0x10FFFF) { - [self freeMemory: unicodeString]; - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - - if (c >> 8 < tableSize) { - of_unichar_t tc = table[c >> 8][c & 0xFF]; - - if (tc) - c = tc; - } - unicodeString[j++] = c; - - if (c < 0x80) - newCStringLength++; - else if (c < 0x800) - newCStringLength += 2; - else if (c < 0x10000) - newCStringLength += 3; - else if (c < 0x110000) - newCStringLength += 4; - else { - [self freeMemory: unicodeString]; - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - - i += cLen; - } - - @try { - newCString = [self allocMemoryWithSize: newCStringLength + 1]; - } @catch (id e) { - [self freeMemory: unicodeString]; - @throw e; - } - - j = 0; - - for (i = 0; i < unicodeLen; i++) { - if ((d = of_string_unicode_to_utf8(unicodeString[i], - newCString + j)) == 0) { - [self freeMemory: unicodeString]; - [self freeMemory: newCString]; - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - j += d; - } - - assert(j == newCStringLength); - newCString[j] = 0; - [self freeMemory: unicodeString]; - - [self freeMemory: s->cString]; - s->cString = newCString; - s->cStringLength = newCStringLength; - - /* - * Even though cStringLength can change, length cannot, therefore no - * need to change it. - */ + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *string = [self unicodeString]; + size_t i, length = [self length]; + + for (i = 0; i < length; i++) { + of_unichar_t c = string[i]; + + if (c >> 8 < tableSize && table[c >> 8][c & 0xFF]) + [self setCharacter: table[c >> 8][c & 0xFF] + atIndex: i]; + } + + [pool release]; +} + +- (void)setCharacter: (of_unichar_t)character + atIndex: (size_t)index +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (void)appendUTF8String: (const char*)UTF8String { - size_t UTF8StringLength = strlen(UTF8String); - size_t length; - - if (UTF8StringLength >= 3 && !memcmp(UTF8String, "\xEF\xBB\xBF", 3)) { - UTF8String += 3; - UTF8StringLength -= 3; - } - - switch (of_string_check_utf8(UTF8String, UTF8StringLength, &length)) { - case 1: - s->UTF8 = YES; - break; - case -1: - @throw [OFInvalidEncodingException exceptionWithClass: isa]; - } - - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + - UTF8StringLength + 1]; - memcpy(s->cString + s->cStringLength, UTF8String, UTF8StringLength + 1); - - s->cStringLength += UTF8StringLength; - s->length += length; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + + [self appendString: [OFString stringWithUTF8String: UTF8String]]; + + [pool release]; } - (void)appendUTF8String: (const char*)UTF8String withLength: (size_t)UTF8StringLength { - size_t length; - - if (UTF8StringLength >= 3 && !memcmp(UTF8String, "\xEF\xBB\xBF", 3)) { - UTF8String += 3; - UTF8StringLength -= 3; - } - - switch (of_string_check_utf8(UTF8String, UTF8StringLength, &length)) { - case 1: - s->UTF8 = YES; - break; - case -1: - @throw [OFInvalidEncodingException exceptionWithClass: isa]; - } - - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + - UTF8StringLength + 1]; - memcpy(s->cString + s->cStringLength, UTF8String, UTF8StringLength); - - s->cStringLength += UTF8StringLength; - s->length += length; - - s->cString[s->cStringLength] = 0; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + + [self appendString: [OFString stringWithUTF8String: UTF8String + length: UTF8StringLength]]; + + [pool release]; } - (void)appendCString: (const char*)cString withEncoding: (of_string_encoding_t)encoding { - return [self appendCString: cString - withEncoding: encoding - length: strlen(cString)]; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + + [self appendString: [OFString stringWithCString: cString + encoding: encoding]]; + + [pool release]; } - (void)appendCString: (const char*)cString withEncoding: (of_string_encoding_t)encoding length: (size_t)cStringLength { - if (encoding == OF_STRING_ENCODING_UTF_8) - [self appendUTF8String: cString - withLength: cStringLength]; - else { - OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - [self appendString: - [OFString stringWithCString: cString - encoding: encoding - length: cStringLength]]; - [pool release]; - } + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + + [self appendString: [OFString stringWithCString: cString + encoding: encoding + length: cStringLength]]; + + [pool release]; } - (void)appendString: (OFString*)string { - size_t UTF8StringLength; - - if (string == nil) - @throw [OFInvalidArgumentException exceptionWithClass: isa - selector: _cmd]; - - UTF8StringLength = [string UTF8StringLength]; - - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + - UTF8StringLength + 1]; - memcpy(s->cString + s->cStringLength, string->s->cString, - UTF8StringLength); - - s->cStringLength += UTF8StringLength; - s->length += string->s->length; - - s->cString[s->cStringLength] = 0; - - if (string->s->UTF8) - s->UTF8 = YES; + return [self insertString: string + atIndex: [self length]]; } - (void)appendFormat: (OFConstantString*)format, ... { va_list arguments; @@ -279,87 +364,18 @@ atIndex: 0]; } - (void)reverse { - size_t i, j; - - /* We reverse all bytes and restore UTF-8 later, if necessary */ - for (i = 0, j = s->cStringLength - 1; i < s->cStringLength / 2; - i++, j--) { - s->cString[i] ^= s->cString[j]; - s->cString[j] ^= s->cString[i]; - s->cString[i] ^= s->cString[j]; - } - - if (!s->UTF8) - return; - - for (i = 0; i < s->cStringLength; i++) { - /* ASCII */ - if (OF_LIKELY(!(s->cString[i] & 0x80))) - continue; - - /* A start byte can't happen first as we reversed everything */ - if (OF_UNLIKELY(s->cString[i] & 0x40)) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - /* Next byte must not be ASCII */ - if (OF_UNLIKELY(s->cStringLength < i + 1 || - !(s->cString[i + 1] & 0x80))) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - /* Next byte is the start byte */ - if (OF_LIKELY(s->cString[i + 1] & 0x40)) { - s->cString[i] ^= s->cString[i + 1]; - s->cString[i + 1] ^= s->cString[i]; - s->cString[i] ^= s->cString[i + 1]; - - i++; - continue; - } - - /* Second next byte must not be ASCII */ - if (OF_UNLIKELY(s->cStringLength < i + 2 || - !(s->cString[i + 2] & 0x80))) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - /* Second next byte is the start byte */ - if (OF_LIKELY(s->cString[i + 2] & 0x40)) { - s->cString[i] ^= s->cString[i + 2]; - s->cString[i + 2] ^= s->cString[i]; - s->cString[i] ^= s->cString[i + 2]; - - i += 2; - continue; - } - - /* Third next byte must not be ASCII */ - if (OF_UNLIKELY(s->cStringLength < i + 3 || - !(s->cString[i + 3] & 0x80))) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - /* Third next byte is the start byte */ - if (OF_LIKELY(s->cString[i + 3] & 0x40)) { - s->cString[i] ^= s->cString[i + 3]; - s->cString[i + 3] ^= s->cString[i]; - s->cString[i] ^= s->cString[i + 3]; - - s->cString[i + 1] ^= s->cString[i + 2]; - s->cString[i + 2] ^= s->cString[i + 1]; - s->cString[i + 1] ^= s->cString[i + 2]; - - i += 3; - continue; - } - - /* UTF-8 does not allow more than 4 bytes per character */ - @throw [OFInvalidEncodingException exceptionWithClass: isa]; + size_t i, j, length = [self length]; + + for (i = 0, j = length - 1; i < length / 2; i++, j--) { + of_unichar_t tmp = [self characterAtIndex: j]; + [self setCharacter: [self characterAtIndex: i] + atIndex: j]; + [self setCharacter: tmp + atIndex: i]; } } - (void)upper { @@ -374,260 +390,110 @@ } - (void)insertString: (OFString*)string atIndex: (size_t)index { - size_t newCStringLength; - - if (index > s->length) - @throw [OFOutOfRangeException exceptionWithClass: isa]; - - if (s->UTF8) - index = of_string_index_to_position(s->cString, index, - s->cStringLength); - - newCStringLength = s->cStringLength + [string UTF8StringLength]; - s->cString = [self resizeMemory: s->cString - toSize: newCStringLength + 1]; - - memmove(s->cString + index + string->s->cStringLength, - s->cString + index, s->cStringLength - index); - memcpy(s->cString + index, string->s->cString, - string->s->cStringLength); - s->cString[newCStringLength] = '\0'; - - s->cStringLength = newCStringLength; - s->length += string->s->length; + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (void)deleteCharactersInRange: (of_range_t)range { - size_t start = range.start; - size_t end = range.start + range.length; - - if (start > end) - @throw [OFInvalidArgumentException exceptionWithClass: isa - selector: _cmd]; - - if (end > s->length) - @throw [OFOutOfRangeException exceptionWithClass: isa]; - - s->length -= end - start; - - if (s->UTF8) { - start = of_string_index_to_position(s->cString, start, - s->cStringLength); - end = of_string_index_to_position(s->cString, end, - s->cStringLength); - } - - memmove(s->cString + start, s->cString + end, s->cStringLength - end); - s->cStringLength -= end - start; - s->cString[s->cStringLength] = 0; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't really care, as we only made it smaller */ - } + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (void)replaceCharactersInRange: (of_range_t)range withString: (OFString*)replacement { - size_t start = range.start; - size_t end = range.start + range.length; - size_t newCStringLength, newLength; - - if (start > end) - @throw [OFInvalidArgumentException exceptionWithClass: isa - selector: _cmd]; - - if (end > s->length) - @throw [OFOutOfRangeException exceptionWithClass: isa]; - - newLength = s->length - (end - start) + [replacement length]; - - if (s->UTF8) { - start = of_string_index_to_position(s->cString, start, - s->cStringLength); - end = of_string_index_to_position(s->cString, end, - s->cStringLength); - } - - newCStringLength = s->cStringLength - (end - start) + - replacement->s->cStringLength; - s->cString = [self resizeMemory: s->cString - toSize: newCStringLength + 1]; - - memmove(s->cString + end, s->cString + start + - replacement->s->cStringLength, s->cStringLength - end); - memcpy(s->cString + start, replacement->s->cString, - replacement->s->cStringLength); - s->cString[newCStringLength] = '\0'; - - s->cStringLength = newCStringLength; - s->length = newLength; + [self deleteCharactersInRange: range]; + [self insertString: replacement + atIndex: range.start]; } - (void)replaceOccurrencesOfString: (OFString*)string withString: (OFString*)replacement { - const char *UTF8String = [string UTF8String]; - const char *replacementUTF8String = [replacement UTF8String]; - size_t UTF8StringLength = string->s->cStringLength; - size_t replacementUTF8StringLength = replacement->s->cStringLength; - size_t i, last, newCStringLength, newLength; - char *newCString; - - if (UTF8StringLength > s->cStringLength) + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init], *pool2; + const of_unichar_t *unicodeString; + const of_unichar_t *searchString = [string unicodeString]; + size_t length = [self length]; + size_t searchLength = [string length]; + size_t replacementLength = [replacement length]; + size_t i, last; + + if (searchLength > length) { + [pool release]; return; + } - newCString = NULL; - newCStringLength = 0; - newLength = s->length; + pool2 = [[OFAutoreleasePool alloc] init]; + unicodeString = [self unicodeString]; - for (i = 0, last = 0; i <= s->cStringLength - UTF8StringLength; i++) { - if (memcmp(s->cString + i, UTF8String, UTF8StringLength)) + for (i = 0, last = 0; i <= length - searchLength; i++) { + if (memcmp(unicodeString + i, searchString, + searchLength * sizeof(of_unichar_t))) continue; - @try { - newCString = [self - resizeMemory: newCString - toSize: newCStringLength + i - last + - replacementUTF8StringLength + 1]; - } @catch (id e) { - [self freeMemory: newCString]; - @throw e; - } - memcpy(newCString + newCStringLength, s->cString + last, - i - last); - memcpy(newCString + newCStringLength + i - last, - replacementUTF8String, replacementUTF8StringLength); - - newCStringLength += i - last + replacementUTF8StringLength; - newLength = newLength - string->s->length + - replacement->s->length; - - i += UTF8StringLength - 1; - last = i + 1; - } - - @try { - newCString = [self - resizeMemory: newCString - toSize: newCStringLength + - s->cStringLength - last + 1]; - } @catch (id e) { - [self freeMemory: newCString]; - @throw e; - } - memcpy(newCString + newCStringLength, s->cString + last, - s->cStringLength - last); - newCStringLength += s->cStringLength - last; - newCString[newCStringLength] = 0; - - [self freeMemory: s->cString]; - s->cString = newCString; - s->cStringLength = newCStringLength; - s->length = newLength; + [self replaceCharactersInRange: of_range(i, searchLength) + withString: replacement]; + + length -= searchLength; + length += replacementLength; + + i += replacementLength - 1; + last = i + 1; + + [pool2 releaseObjects]; + + unicodeString = [self unicodeString]; + } + + [pool release]; } - (void)deleteLeadingWhitespaces { - size_t i; + size_t i, length = [self length]; + + for (i = 0; i < length; i++) { + of_unichar_t c = [self characterAtIndex: i]; - for (i = 0; i < s->cStringLength; i++) - if (s->cString[i] != ' ' && s->cString[i] != '\t' && - s->cString[i] != '\n' && s->cString[i] != '\r' && - s->cString[i] != '\f') + if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && + c != '\f') break; - - s->cStringLength -= i; - s->length -= i; - - memmove(s->cString, s->cString + i, s->cStringLength); - s->cString[s->cStringLength] = '\0'; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't really care, as we only made it smaller */ - } + } + + [self deleteCharactersInRange: of_range(0, i)]; } - (void)deleteTrailingWhitespaces { - size_t d; - char *p; - - d = 0; - for (p = s->cString + s->cStringLength - 1; p >= s->cString; p--) { - if (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r' && - *p != '\f') - break; - - *p = '\0'; - d++; - } - - s->cStringLength -= d; - s->length -= d; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't really care, as we only made it smaller */ - } + size_t length = [self length]; + ssize_t i; + + for (i = length - 1; i >= 0; i--) { + of_unichar_t c = [self characterAtIndex: i]; + + if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && + c != '\f') + break; + } + + [self deleteCharactersInRange: of_range(i + 1, length - i - 1)]; } - (void)deleteEnclosingWhitespaces { - size_t d, i; - char *p; - - d = 0; - for (p = s->cString + s->cStringLength - 1; p >= s->cString; p--) { - if (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r' && - *p != '\f') - break; - - *p = '\0'; - d++; - } - - s->cStringLength -= d; - s->length -= d; - - for (i = 0; i < s->cStringLength; i++) - if (s->cString[i] != ' ' && s->cString[i] != '\t' && - s->cString[i] != '\n' && s->cString[i] != '\r' && - s->cString[i] != '\f') - break; - - s->cStringLength -= i; - s->length -= i; - - memmove(s->cString, s->cString + i, s->cStringLength); - s->cString[s->cStringLength] = '\0'; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't really care, as we only made it smaller */ - } + [self deleteLeadingWhitespaces]; + [self deleteTrailingWhitespaces]; } - copy { return [[OFString alloc] initWithString: self]; } - (void)makeImmutable { - isa = [OFString class]; } @end ADDED src/OFMutableString_UTF8.h Index: src/OFMutableString_UTF8.h ================================================================== --- src/OFMutableString_UTF8.h +++ src/OFMutableString_UTF8.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#import "OFMutableString.h" + +@interface OFMutableString_UTF8: OFMutableString +{ +@public + struct of_string_utf8_ivars *restrict s; +} +@end ADDED src/OFMutableString_UTF8.m Index: src/OFMutableString_UTF8.m ================================================================== --- src/OFMutableString_UTF8.m +++ src/OFMutableString_UTF8.m @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#include "config.h" + +#include +#include +#include +#include + +#import "OFString.h" +#import "OFString_UTF8.h" +#import "OFMutableString_UTF8.h" +#import "OFAutoreleasePool.h" + +#import "OFInvalidArgumentException.h" +#import "OFInvalidEncodingException.h" +#import "OFInvalidFormatException.h" +#import "OFOutOfMemoryException.h" +#import "OFOutOfRangeException.h" + +#import "macros.h" + +#import "of_asprintf.h" +#import "unicode.h" + +@implementation OFMutableString_UTF8 ++ (void)initialize +{ + if (self == [OFMutableString_UTF8 class]) + [self inheritMethodsFromClass: [OFString_UTF8 class]]; +} + +- (void)_applyTable: (const of_unichar_t* const[])table + withSize: (size_t)tableSize +{ + of_unichar_t c; + of_unichar_t *unicodeString; + size_t unicodeLen, newCStringLength, cLen; + size_t i, j, d; + char *newCString; + + if (!s->UTF8) { + assert(tableSize >= 1); + + uint8_t *p = (uint8_t*)s->cString + s->cStringLength; + uint8_t t; + + while (--p >= (uint8_t*)s->cString) + if ((t = table[0][*p]) != 0) + *p = t; + + return; + } + + unicodeLen = [self length]; + unicodeString = [self allocMemoryForNItems: unicodeLen + ofSize: sizeof(of_unichar_t)]; + + i = j = 0; + newCStringLength = 0; + + while (i < s->cStringLength) { + cLen = of_string_utf8_to_unicode(s->cString + i, + s->cStringLength - i, &c); + + if (cLen == 0 || c > 0x10FFFF) { + [self freeMemory: unicodeString]; + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + + if (c >> 8 < tableSize) { + of_unichar_t tc = table[c >> 8][c & 0xFF]; + + if (tc) + c = tc; + } + unicodeString[j++] = c; + + if (c < 0x80) + newCStringLength++; + else if (c < 0x800) + newCStringLength += 2; + else if (c < 0x10000) + newCStringLength += 3; + else if (c < 0x110000) + newCStringLength += 4; + else { + [self freeMemory: unicodeString]; + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + + i += cLen; + } + + @try { + newCString = [self allocMemoryWithSize: newCStringLength + 1]; + } @catch (id e) { + [self freeMemory: unicodeString]; + @throw e; + } + + j = 0; + + for (i = 0; i < unicodeLen; i++) { + if ((d = of_string_unicode_to_utf8(unicodeString[i], + newCString + j)) == 0) { + [self freeMemory: unicodeString]; + [self freeMemory: newCString]; + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + j += d; + } + + assert(j == newCStringLength); + newCString[j] = 0; + [self freeMemory: unicodeString]; + + [self freeMemory: s->cString]; + s->cString = newCString; + s->cStringLength = newCStringLength; + + /* + * Even though cStringLength can change, length cannot, therefore no + * need to change it. + */ +} + +- (void)setCharacter: (of_unichar_t)character + atIndex: (size_t)index +{ + char buffer[4]; + of_unichar_t c; + size_t length, oldLength; + + if (s->UTF8) + index = of_string_index_to_position(s->cString, index, + s->cStringLength); + + if (!(character & 0x80) && !(s->cString[index] & 0x80)) { + s->cString[index] = character; + return; + } + + if ((length = of_string_unicode_to_utf8(character, buffer)) == 0) + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + + if ((oldLength = of_string_utf8_to_unicode(s->cString + index, + s->cStringLength - index, &c)) == 0) + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + + if (length == oldLength) { + memcpy(s->cString + index, buffer, length); + return; + } + + if (length > oldLength) { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength - + oldLength + length + 1]; + + memmove(s->cString + index + length, + s->cString + index + oldLength, + s->cStringLength - index - oldLength); + memcpy(s->cString + index, buffer, length); + + s->cStringLength -= oldLength; + s->cStringLength += length; + s->cString[s->cStringLength] = '\0'; + + if (character & 0x80) + s->UTF8 = YES; + + return; + } + + if (length < oldLength) { + memmove(s->cString + index + length, + s->cString + index + oldLength, + s->cStringLength - index - oldLength); + memcpy(s->cString + index, buffer, length); + + s->cStringLength -= oldLength; + s->cStringLength += length; + s->cString[s->cStringLength] = '\0'; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't really care, as we only made it smaller */ + } + + return; + } + + assert(0); +} + +- (void)appendUTF8String: (const char*)UTF8String +{ + size_t UTF8StringLength = strlen(UTF8String); + size_t length; + + if (UTF8StringLength >= 3 && !memcmp(UTF8String, "\xEF\xBB\xBF", 3)) { + UTF8String += 3; + UTF8StringLength -= 3; + } + + switch (of_string_check_utf8(UTF8String, UTF8StringLength, &length)) { + case 1: + s->UTF8 = YES; + break; + case -1: + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + } + + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + + UTF8StringLength + 1]; + memcpy(s->cString + s->cStringLength, UTF8String, UTF8StringLength + 1); + + s->cStringLength += UTF8StringLength; + s->length += length; +} + +- (void)appendUTF8String: (const char*)UTF8String + withLength: (size_t)UTF8StringLength +{ + size_t length; + + if (UTF8StringLength >= 3 && !memcmp(UTF8String, "\xEF\xBB\xBF", 3)) { + UTF8String += 3; + UTF8StringLength -= 3; + } + + switch (of_string_check_utf8(UTF8String, UTF8StringLength, &length)) { + case 1: + s->UTF8 = YES; + break; + case -1: + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + } + + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + + UTF8StringLength + 1]; + memcpy(s->cString + s->cStringLength, UTF8String, UTF8StringLength); + + s->cStringLength += UTF8StringLength; + s->length += length; + + s->cString[s->cStringLength] = 0; +} + +- (void)appendCString: (const char*)cString + withEncoding: (of_string_encoding_t)encoding +{ + return [self appendCString: cString + withEncoding: encoding + length: strlen(cString)]; +} + +- (void)appendCString: (const char*)cString + withEncoding: (of_string_encoding_t)encoding + length: (size_t)cStringLength +{ + if (encoding == OF_STRING_ENCODING_UTF_8) + [self appendUTF8String: cString + withLength: cStringLength]; + else { + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + [self appendString: + [OFString stringWithCString: cString + encoding: encoding + length: cStringLength]]; + [pool release]; + } +} + +- (void)appendString: (OFString*)string +{ + size_t UTF8StringLength; + + if (string == nil) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + UTF8StringLength = [string UTF8StringLength]; + + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + + UTF8StringLength + 1]; + memcpy(s->cString + s->cStringLength, [string UTF8String], + UTF8StringLength); + + s->cStringLength += UTF8StringLength; + s->length += [string length]; + + s->cString[s->cStringLength] = 0; + + if ([string isKindOfClass: [OFString_UTF8 class]] || + [string isKindOfClass: [OFMutableString_UTF8 class]]) { + if (((OFString_UTF8*)string)->s->UTF8) + s->UTF8 = YES; + } else + s->UTF8 = YES; +} + +- (void)appendFormat: (OFConstantString*)format + withArguments: (va_list)arguments +{ + char *UTF8String; + int UTF8StringLength; + + if (format == nil) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + if ((UTF8StringLength = of_vasprintf(&UTF8String, [format UTF8String], + arguments)) == -1) + @throw [OFInvalidFormatException exceptionWithClass: isa]; + + @try { + [self appendUTF8String: UTF8String + withLength: UTF8StringLength]; + } @finally { + free(UTF8String); + } +} + +- (void)reverse +{ + size_t i, j; + + /* We reverse all bytes and restore UTF-8 later, if necessary */ + for (i = 0, j = s->cStringLength - 1; i < s->cStringLength / 2; + i++, j--) { + s->cString[i] ^= s->cString[j]; + s->cString[j] ^= s->cString[i]; + s->cString[i] ^= s->cString[j]; + } + + if (!s->UTF8) + return; + + for (i = 0; i < s->cStringLength; i++) { + /* ASCII */ + if (OF_LIKELY(!(s->cString[i] & 0x80))) + continue; + + /* A start byte can't happen first as we reversed everything */ + if (OF_UNLIKELY(s->cString[i] & 0x40)) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + /* Next byte must not be ASCII */ + if (OF_UNLIKELY(s->cStringLength < i + 1 || + !(s->cString[i + 1] & 0x80))) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + /* Next byte is the start byte */ + if (OF_LIKELY(s->cString[i + 1] & 0x40)) { + s->cString[i] ^= s->cString[i + 1]; + s->cString[i + 1] ^= s->cString[i]; + s->cString[i] ^= s->cString[i + 1]; + + i++; + continue; + } + + /* Second next byte must not be ASCII */ + if (OF_UNLIKELY(s->cStringLength < i + 2 || + !(s->cString[i + 2] & 0x80))) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + /* Second next byte is the start byte */ + if (OF_LIKELY(s->cString[i + 2] & 0x40)) { + s->cString[i] ^= s->cString[i + 2]; + s->cString[i + 2] ^= s->cString[i]; + s->cString[i] ^= s->cString[i + 2]; + + i += 2; + continue; + } + + /* Third next byte must not be ASCII */ + if (OF_UNLIKELY(s->cStringLength < i + 3 || + !(s->cString[i + 3] & 0x80))) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + /* Third next byte is the start byte */ + if (OF_LIKELY(s->cString[i + 3] & 0x40)) { + s->cString[i] ^= s->cString[i + 3]; + s->cString[i + 3] ^= s->cString[i]; + s->cString[i] ^= s->cString[i + 3]; + + s->cString[i + 1] ^= s->cString[i + 2]; + s->cString[i + 2] ^= s->cString[i + 1]; + s->cString[i + 1] ^= s->cString[i + 2]; + + i += 3; + continue; + } + + /* UTF-8 does not allow more than 4 bytes per character */ + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + } +} + +- (void)insertString: (OFString*)string + atIndex: (size_t)index +{ + size_t newCStringLength; + + if (index > s->length) + @throw [OFOutOfRangeException exceptionWithClass: isa]; + + if (s->UTF8) + index = of_string_index_to_position(s->cString, index, + s->cStringLength); + + newCStringLength = s->cStringLength + [string UTF8StringLength]; + s->cString = [self resizeMemory: s->cString + toSize: newCStringLength + 1]; + + memmove(s->cString + index + [string UTF8StringLength], + s->cString + index, s->cStringLength - index); + memcpy(s->cString + index, [string UTF8String], + [string UTF8StringLength]); + s->cString[newCStringLength] = '\0'; + + s->cStringLength = newCStringLength; + s->length += [string length]; + + if ([string isKindOfClass: [OFString_UTF8 class]] || + [string isKindOfClass: [OFMutableString_UTF8 class]]) { + if (((OFString_UTF8*)string)->s->UTF8) + s->UTF8 = YES; + } else + s->UTF8 = YES; +} + +- (void)deleteCharactersInRange: (of_range_t)range +{ + size_t start = range.start; + size_t end = range.start + range.length; + + if (start > end) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + if (end > s->length) + @throw [OFOutOfRangeException exceptionWithClass: isa]; + + s->length -= end - start; + + if (s->UTF8) { + start = of_string_index_to_position(s->cString, start, + s->cStringLength); + end = of_string_index_to_position(s->cString, end, + s->cStringLength); + } + + memmove(s->cString + start, s->cString + end, s->cStringLength - end); + s->cStringLength -= end - start; + s->cString[s->cStringLength] = 0; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't really care, as we only made it smaller */ + } +} + +- (void)replaceCharactersInRange: (of_range_t)range + withString: (OFString*)replacement +{ + size_t start = range.start; + size_t end = range.start + range.length; + size_t newCStringLength, newLength; + + if (start > end) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + if (end > s->length) + @throw [OFOutOfRangeException exceptionWithClass: isa]; + + newLength = s->length - (end - start) + [replacement length]; + + if (s->UTF8) { + start = of_string_index_to_position(s->cString, start, + s->cStringLength); + end = of_string_index_to_position(s->cString, end, + s->cStringLength); + } + + newCStringLength = s->cStringLength - (end - start) + + [replacement UTF8StringLength]; + s->cString = [self resizeMemory: s->cString + toSize: newCStringLength + 1]; + + memmove(s->cString + end, s->cString + start + + [replacement UTF8StringLength], s->cStringLength - end); + memcpy(s->cString + start, [replacement UTF8String], + [replacement UTF8StringLength]); + s->cString[newCStringLength] = '\0'; + + s->cStringLength = newCStringLength; + s->length = newLength; +} + +- (void)replaceOccurrencesOfString: (OFString*)string + withString: (OFString*)replacement +{ + const char *UTF8String = [string UTF8String]; + const char *replacementUTF8String = [replacement UTF8String]; + size_t UTF8StringLength = [string UTF8StringLength]; + size_t replacementUTF8StringLength = [replacement UTF8StringLength]; + size_t i, last, newCStringLength, newLength; + char *newCString; + + if (UTF8StringLength > s->cStringLength) + return; + + newCString = NULL; + newCStringLength = 0; + newLength = s->length; + + for (i = 0, last = 0; i <= s->cStringLength - UTF8StringLength; i++) { + if (memcmp(s->cString + i, UTF8String, UTF8StringLength)) + continue; + + @try { + newCString = [self + resizeMemory: newCString + toSize: newCStringLength + i - last + + replacementUTF8StringLength + 1]; + } @catch (id e) { + [self freeMemory: newCString]; + @throw e; + } + memcpy(newCString + newCStringLength, s->cString + last, + i - last); + memcpy(newCString + newCStringLength + i - last, + replacementUTF8String, replacementUTF8StringLength); + + newCStringLength += i - last + replacementUTF8StringLength; + newLength = newLength - [string length] + [replacement length]; + + i += UTF8StringLength - 1; + last = i + 1; + } + + @try { + newCString = [self + resizeMemory: newCString + toSize: newCStringLength + + s->cStringLength - last + 1]; + } @catch (id e) { + [self freeMemory: newCString]; + @throw e; + } + memcpy(newCString + newCStringLength, s->cString + last, + s->cStringLength - last); + newCStringLength += s->cStringLength - last; + newCString[newCStringLength] = 0; + + [self freeMemory: s->cString]; + s->cString = newCString; + s->cStringLength = newCStringLength; + s->length = newLength; +} + +- (void)deleteLeadingWhitespaces +{ + size_t i; + + for (i = 0; i < s->cStringLength; i++) + if (s->cString[i] != ' ' && s->cString[i] != '\t' && + s->cString[i] != '\n' && s->cString[i] != '\r' && + s->cString[i] != '\f') + break; + + s->cStringLength -= i; + s->length -= i; + + memmove(s->cString, s->cString + i, s->cStringLength); + s->cString[s->cStringLength] = '\0'; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't really care, as we only made it smaller */ + } +} + +- (void)deleteTrailingWhitespaces +{ + size_t d; + char *p; + + d = 0; + for (p = s->cString + s->cStringLength - 1; p >= s->cString; p--) { + if (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r' && + *p != '\f') + break; + + *p = '\0'; + d++; + } + + s->cStringLength -= d; + s->length -= d; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't really care, as we only made it smaller */ + } +} + +- (void)deleteEnclosingWhitespaces +{ + size_t d, i; + char *p; + + d = 0; + for (p = s->cString + s->cStringLength - 1; p >= s->cString; p--) { + if (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r' && + *p != '\f') + break; + + *p = '\0'; + d++; + } + + s->cStringLength -= d; + s->length -= d; + + for (i = 0; i < s->cStringLength; i++) + if (s->cString[i] != ' ' && s->cString[i] != '\t' && + s->cString[i] != '\n' && s->cString[i] != '\r' && + s->cString[i] != '\f') + break; + + s->cStringLength -= i; + s->length -= i; + + memmove(s->cString, s->cString + i, s->cStringLength); + s->cString[s->cStringLength] = '\0'; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't really care, as we only made it smaller */ + } +} + +- (void)makeImmutable +{ + isa = [OFString_UTF8 class]; +} +@end Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -72,33 +72,10 @@ * you should use the corresponding methods to get the ivars, which ensures the * constant string is initialized. */ @interface OFString: OFObject -{ - /* - * The ivars have to be like this because OFConstantString bases on - * OFString. - * - * The compiler generates an instance with a const char* and a size_t - * for each constant string. We change the const char* to point to our - * struct on the first call to a constant string so we can have more - * than those two ivars. - */ - struct of_string_ivars { - char *cString; - size_t cStringLength; - BOOL UTF8; - size_t length; - } *restrict s; - /* - * Unused in OFString, however, OFConstantString sets this to SIZE_MAX - * once it allocated and initialized the struct. - */ - size_t initialized; -} - #ifdef OF_HAVE_PROPERTIES @property (readonly) size_t length; #endif /** @@ -592,10 +569,20 @@ * \param index The index of the Unicode character to return * \return The Unicode character at the specified index */ - (of_unichar_t)characterAtIndex: (size_t)index; +/** + * \brief Copies the Unicode characters in the specified range to the specified + * buffer. + * + * \param buffer The buffer to store the Unicode characters + * \param range The range of the Unicode characters to copy + */ +- (void)getCharacters: (of_unichar_t*)buffer + inRange: (of_range_t)range; + /** * \brief Returns the index of the first occurrence of the string. * * \param string The string to search * \return The index of the first occurrence of the string or OF_INVALID_INDEX Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -15,19 +15,17 @@ */ #include "config.h" #include -#include #include #include -#include -#include #include #import "OFString.h" +#import "OFString_UTF8.h" #import "OFArray.h" #import "OFDictionary.h" #import "OFFile.h" #import "OFURL.h" #import "OFHTTPRequest.h" @@ -47,13 +45,10 @@ #import "macros.h" #import "of_asprintf.h" #import "unicode.h" -extern const uint16_t of_iso_8859_15[256]; -extern const uint16_t of_windows_1252[256]; - /* References for static linking */ void _references_to_categories_of_OFString(void) { _OFString_Hashing_reference = 1; _OFString_Serialization_reference = 1; @@ -60,25 +55,10 @@ _OFString_URLEncoding_reference = 1; _OFString_XMLEscaping_reference = 1; _OFString_XMLUnescaping_reference = 1; } -static inline int -memcasecmp(const char *first, const char *second, size_t length) -{ - size_t i; - - for (i = 0; i < length; i++) { - if (tolower((int)first[i]) > tolower((int)second[i])) - return OF_ORDERED_DESCENDING; - if (tolower((int)first[i]) < tolower((int)second[i])) - return OF_ORDERED_ASCENDING; - } - - return OF_ORDERED_SAME; -} - int of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length) { size_t i, tmpLength = cStringLength; int UTF8 = 0; @@ -258,12 +238,219 @@ while (*string_ != 0) string_++; return (size_t)(string_ - string); } + +static struct { + Class isa; +} placeholder; + +@interface OFString_placeholder: OFString +@end + +@implementation OFString_placeholder +- init +{ + return (id)[[OFString_UTF8 alloc] init]; +} + +- initWithUTF8String: (const char*)UTF8String +{ + return (id)[[OFString_UTF8 alloc] initWithUTF8String: UTF8String]; +} + +- initWithUTF8String: (const char*)UTF8String + length: (size_t)UTF8StringLength +{ + return (id)[[OFString_UTF8 alloc] initWithUTF8String: UTF8String + length: UTF8StringLength]; +} + +- initWithCString: (const char*)cString + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFString_UTF8 alloc] initWithCString: cString + encoding: encoding]; +} + +- initWithCString: (const char*)cString + encoding: (of_string_encoding_t)encoding + length: (size_t)cStringLength +{ + return (id)[[OFString_UTF8 alloc] initWithCString: cString + encoding: encoding + length: cStringLength]; +} + +- initWithString: (OFString*)string +{ + return (id)[[OFString_UTF8 alloc] initWithString: string]; +} + +- initWithUnicodeString: (const of_unichar_t*)string +{ + return (id)[[OFString_UTF8 alloc] initWithUnicodeString: string]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + byteOrder: (of_endianess_t)byteOrder +{ + return (id)[[OFString_UTF8 alloc] initWithUnicodeString: string + byteOrder: byteOrder]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + length: (size_t)length +{ + return (id)[[OFString_UTF8 alloc] initWithUnicodeString: string + length: length]; +} + +- initWithUnicodeString: (const of_unichar_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + return (id)[[OFString_UTF8 alloc] initWithUnicodeString: string + byteOrder: byteOrder + length: length]; +} + +- initWithUTF16String: (const uint16_t*)string +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string]; +} + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_endianess_t)byteOrder +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string + byteOrder: byteOrder]; +} + +- initWithUTF16String: (const uint16_t*)string + length: (size_t)length +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string + length: length]; +} + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string + byteOrder: byteOrder + length: length]; +} + +- initWithFormat: (OFConstantString*)format, ... +{ + id ret; + va_list arguments; + + va_start(arguments, format); + ret = [[OFString_UTF8 alloc] initWithFormat: format + arguments: arguments]; + va_end(arguments); + + return ret; +} + +- initWithFormat: (OFConstantString*)format + arguments: (va_list)arguments +{ + return (id)[[OFString_UTF8 alloc] initWithFormat: format + arguments: arguments]; +} + +- initWithPath: (OFString*)firstComponent, ... +{ + id ret; + va_list arguments; + + va_start(arguments, firstComponent); + ret = [[OFString_UTF8 alloc] initWithPath: firstComponent + arguments: arguments]; + va_end(arguments); + + return ret; +} + +- initWithPath: (OFString*)firstComponent + arguments: (va_list)arguments +{ + return (id)[[OFString_UTF8 alloc] initWithPath: firstComponent + arguments: arguments]; +} + +- initWithContentsOfFile: (OFString*)path +{ + return (id)[[OFString_UTF8 alloc] initWithContentsOfFile: path]; +} + +- initWithContentsOfFile: (OFString*)path + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFString_UTF8 alloc] initWithContentsOfFile: path + encoding: encoding]; +} + +- initWithContentsOfURL: (OFURL*)URL +{ + return (id)[[OFString_UTF8 alloc] initWithContentsOfURL: URL]; +} + +- initWithContentsOfURL: (OFURL*)URL + encoding: (of_string_encoding_t)encoding +{ + return (id)[[OFString_UTF8 alloc] initWithContentsOfURL: URL + encoding: encoding]; +} + +- initWithSerialization: (OFXMLElement*)element +{ + return (id)[[OFString_UTF8 alloc] initWithSerialization: element]; +} + +- retain +{ + return self; +} + +- autorelease +{ + return self; +} + +- (void)release +{ +} + +- (void)dealloc +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; + [super dealloc]; /* Get rid of a stupid warning */ +} +@end @implementation OFString ++ (void)initialize +{ + if (self == [OFString class]) + placeholder.isa = [OFString_placeholder class]; +} + ++ alloc +{ + if (self == [OFString class]) + return (id)&placeholder; + + return [super alloc]; +} + + string { return [[[self alloc] init] autorelease]; } @@ -407,24 +594,18 @@ encoding: encoding] autorelease]; } - init { - self = [super init]; - - @try { - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - s->cString = [self allocMemoryWithSize: 1]; - s->cString[0] = '\0'; - } @catch (id e) { + if (isa == [OFString class]) { + Class c = isa; [self release]; - @throw e; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - return self; + return [super init]; } - initWithUTF8String: (const char*)UTF8String { return [self initWithCString: UTF8String @@ -450,162 +631,22 @@ - initWithCString: (const char*)cString encoding: (of_string_encoding_t)encoding length: (size_t)cStringLength { - self = [super init]; - - @try { - size_t i, j; - const uint16_t *table; - - if (encoding == OF_STRING_ENCODING_UTF_8 && - cStringLength >= 3 && !memcmp(cString, "\xEF\xBB\xBF", 3)) { - cString += 3; - cStringLength -= 3; - } - - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - s->cString = [self allocMemoryWithSize: cStringLength + 1]; - s->cStringLength = cStringLength; - - if (encoding == OF_STRING_ENCODING_UTF_8 || - encoding == OF_STRING_ENCODING_ASCII) { - switch (of_string_check_utf8(cString, cStringLength, - &s->length)) { - case 1: - if (encoding == OF_STRING_ENCODING_ASCII) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - s->UTF8 = YES; - break; - case -1: - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - - memcpy(s->cString, cString, cStringLength); - s->cString[cStringLength] = 0; - - return self; - } - - /* All other encodings we support are single byte encodings */ - s->length = cStringLength; - - if (encoding == OF_STRING_ENCODING_ISO_8859_1) { - for (i = j = 0; i < cStringLength; i++) { - char buffer[4]; - size_t bytes; - - if (!(cString[i] & 0x80)) { - s->cString[j++] = cString[i]; - continue; - } - - s->UTF8 = YES; - bytes = of_string_unicode_to_utf8( - (uint8_t)cString[i], buffer); - - if (bytes == 0) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - s->cStringLength += bytes - 1; - s->cString = [self - resizeMemory: s->cString - toSize: s->cStringLength + 1]; - - memcpy(s->cString + j, buffer, bytes); - j += bytes; - } - - s->cString[s->cStringLength] = 0; - - return self; - } - - switch (encoding) { - case OF_STRING_ENCODING_ISO_8859_15: - table = of_iso_8859_15; - break; - case OF_STRING_ENCODING_WINDOWS_1252: - table = of_windows_1252; - break; - default: - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - - for (i = j = 0; i < cStringLength; i++) { - char buffer[4]; - of_unichar_t character; - size_t characterBytes; - - if (!(cString[i] & 0x80)) { - s->cString[j++] = cString[i]; - continue; - } - - character = table[(uint8_t)cString[i]]; - - if (character == 0xFFFD) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - s->UTF8 = YES; - characterBytes = of_string_unicode_to_utf8(character, - buffer); - - if (characterBytes == 0) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - s->cStringLength += characterBytes - 1; - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - - memcpy(s->cString + j, buffer, characterBytes); - j += characterBytes; - } - - s->cString[s->cStringLength] = 0; - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithString: (OFString*)string { - self = [super init]; - - @try { - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - /* - * We need one call to make sure it's initialized (in case it's - * a constant string). - */ - s->cStringLength = [string UTF8StringLength]; - s->UTF8 = string->s->UTF8; - s->length = string->s->length; - - s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; - memcpy(s->cString, string->s->cString, s->cStringLength + 1); - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithUnicodeString: (const of_unichar_t*)string { return [self initWithUnicodeString: string @@ -631,87 +672,14 @@ - initWithUnicodeString: (const of_unichar_t*)string byteOrder: (of_endianess_t)byteOrder length: (size_t)length { - self = [super init]; - - @try { - size_t i, j = 0; - BOOL swap = NO; - - if (length > 0 && *string == 0xFEFF) { - string++; - length--; - } else if (length > 0 && *string == 0xFFFE0000) { - swap = YES; - string++; - length--; - } else if (byteOrder != OF_ENDIANESS_NATIVE) - swap = YES; - - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - s->cStringLength = length; - s->cString = [self allocMemoryWithSize: (length * 4) + 1]; - s->length = length; - - for (i = 0; i < length; i++) { - char buffer[4]; - size_t characterLen = of_string_unicode_to_utf8( - (swap ? of_bswap32(string[i]) : string[i]), - buffer); - - switch (characterLen) { - case 1: - s->cString[j++] = buffer[0]; - break; - case 2: - s->UTF8 = YES; - s->cStringLength++; - - memcpy(s->cString + j, buffer, 2); - j += 2; - - break; - case 3: - s->UTF8 = YES; - s->cStringLength += 2; - - memcpy(s->cString + j, buffer, 3); - j += 3; - - break; - case 4: - s->UTF8 = YES; - s->cStringLength += 3; - - memcpy(s->cString + j, buffer, 4); - j += 4; - - break; - default: - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - } - - s->cString[j] = '\0'; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't care, as we only tried to make it smaller */ - } - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithUTF16String: (const uint16_t*)string { return [self initWithUTF16String: string @@ -737,113 +705,14 @@ - initWithUTF16String: (const uint16_t*)string byteOrder: (of_endianess_t)byteOrder length: (size_t)length { - self = [super init]; - - @try { - size_t i, j = 0; - BOOL swap = NO; - - if (length > 0 && *string == 0xFEFF) { - string++; - length--; - } else if (length > 0 && *string == 0xFFFE) { - swap = YES; - string++; - length--; - } else if (byteOrder != OF_ENDIANESS_NATIVE) - swap = YES; - - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - s->cStringLength = length; - s->cString = [self allocMemoryWithSize: (length * 4) + 1]; - s->length = length; - - for (i = 0; i < length; i++) { - char buffer[4]; - of_unichar_t character = - (swap ? of_bswap16(string[i]) : string[i]); - size_t characterLen; - - /* Missing high surrogate */ - if ((character & 0xFC00) == 0xDC00) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - if ((character & 0xFC00) == 0xD800) { - uint16_t nextCharacter; - - if (length <= i + 1) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - nextCharacter = (swap - ? of_bswap16(string[i + 1]) - : string[i + 1]); - character = (((character & 0x3FF) << 10) | - (nextCharacter & 0x3FF)) + 0x10000; - - i++; - s->cStringLength--; - s->length--; - } - - characterLen = of_string_unicode_to_utf8( - character, buffer); - - switch (characterLen) { - case 1: - s->cString[j++] = buffer[0]; - break; - case 2: - s->UTF8 = YES; - s->cStringLength++; - - memcpy(s->cString + j, buffer, 2); - j += 2; - - break; - case 3: - s->UTF8 = YES; - s->cStringLength += 2; - - memcpy(s->cString + j, buffer, 3); - j += 3; - - break; - case 4: - s->UTF8 = YES; - s->cStringLength += 3; - - memcpy(s->cString + j, buffer, 4); - j += 4; - - break; - default: - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - } - - s->cString[j] = '\0'; - - @try { - s->cString = [self resizeMemory: s->cString - toSize: s->cStringLength + 1]; - } @catch (OFOutOfMemoryException *e) { - /* We don't care, as we only tried to make it smaller */ - } - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithFormat: (OFConstantString*)format, ... { id ret; @@ -858,52 +727,14 @@ } - initWithFormat: (OFConstantString*)format arguments: (va_list)arguments { - self = [super init]; - - @try { - int cStringLength; - - if (format == nil) - @throw [OFInvalidArgumentException - exceptionWithClass: isa - selector: _cmd]; - - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - if ((cStringLength = of_vasprintf(&s->cString, - [format UTF8String], arguments)) == -1) - @throw [OFInvalidFormatException - exceptionWithClass: isa]; - - s->cStringLength = cStringLength; - - @try { - switch (of_string_check_utf8(s->cString, - cStringLength, &s->length)) { - case 1: - s->UTF8 = YES; - break; - case -1: - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - } - - [self addMemoryToPool: s->cString]; - } @catch (id e) { - free(s->cString); - @throw e; - } - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithPath: (OFString*)firstComponent, ... { id ret; @@ -918,67 +749,14 @@ } - initWithPath: (OFString*)firstComponent arguments: (va_list)arguments { - self = [super init]; - - @try { - OFString *component; - size_t i, cStringLength; - va_list argumentsCopy; - - s = [self allocMemoryWithSize: sizeof(*s)]; - memset(s, 0, sizeof(*s)); - - /* - * First needs to be a call to be sure it is initialized, in - * case it's a constant string. - */ - s->cStringLength = [firstComponent UTF8StringLength]; - s->UTF8 = firstComponent->s->UTF8; - s->length = firstComponent->s->length; - - /* Calculate length and see if we need UTF-8 */ - va_copy(argumentsCopy, arguments); - while ((component = va_arg(argumentsCopy, OFString*)) != nil) { - /* First needs to be a call, see above */ - s->cStringLength += 1 + [component UTF8StringLength]; - s->length += 1 + component->s->length; - - if (component->s->UTF8) - s->UTF8 = YES; - } - - s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; - - cStringLength = [firstComponent UTF8StringLength]; - memcpy(s->cString, [firstComponent UTF8String], cStringLength); - i = cStringLength; - - while ((component = va_arg(arguments, OFString*)) != nil) { - /* - * We already sent each component a message, so we can - * be sure they are initialized and access them - * directly. - */ - cStringLength = component->s->cStringLength; - - s->cString[i] = OF_PATH_DELIMITER; - memcpy(s->cString + i + 1, component->s->cString, - cStringLength); - - i += 1 + cStringLength; - } - - s->cString[i] = '\0'; - } @catch (id e) { - [self release]; - @throw e; - } - - return self; + Class c = isa; + [self release]; + @throw [OFNotImplementedException exceptionWithClass: c + selector: _cmd]; } - initWithContentsOfFile: (OFString*)path { return [self initWithContentsOfFile: path @@ -1096,15 +874,26 @@ - initWithSerialization: (OFXMLElement*)element { @try { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - if (![[element name] isEqual: [self className]] || - ![[element namespace] isEqual: OF_SERIALIZATION_NS]) + if (![[element namespace] isEqual: OF_SERIALIZATION_NS]) @throw [OFInvalidArgumentException exceptionWithClass: isa selector: _cmd]; + + if ([self isKindOfClass: [OFMutableString class]]) { + if (![[element name] isEqual: @"OFMutableString"]) + @throw [OFInvalidArgumentException + exceptionWithClass: isa + selector: _cmd]; + } else { + if (![[element name] isEqual: @"OFString"]) + @throw [OFInvalidArgumentException + exceptionWithClass: isa + selector: _cmd]; + } self = [self initWithString: [element stringValue]]; [pool release]; } @catch (id e) { @@ -1115,72 +904,85 @@ return self; } - (const char*)UTF8String { - return s->cString; + /* TODO: Implement! */ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (const char*)cStringWithEncoding: (of_string_encoding_t)encoding { - switch (encoding) { - case OF_STRING_ENCODING_UTF_8: - return s->cString; - case OF_STRING_ENCODING_ASCII: - if (s->UTF8) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - return s->cString; - default: - @throw [OFNotImplementedException exceptionWithClass: isa - selector: _cmd]; - } + /* TODO: Implement! */ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (size_t)length { - return s->length; + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (size_t)UTF8StringLength { - return s->cStringLength; + /* TODO: Implement! */ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; } - (size_t)cStringLengthWithEncoding: (of_string_encoding_t)encoding { - switch (encoding) { - case OF_STRING_ENCODING_UTF_8: - return s->cStringLength; - case OF_STRING_ENCODING_ASCII: - if (s->UTF8) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - return s->cStringLength; - default: - @throw [OFNotImplementedException exceptionWithClass: isa - selector: _cmd]; - } + /* TODO: Implement! */ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (of_unichar_t)characterAtIndex: (size_t)index +{ + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; +} + +- (void)getCharacters: (of_unichar_t*)buffer + inRange: (of_range_t)range +{ + size_t i; + + for (i = 0; i < range.length; i++) + buffer[i] = [self characterAtIndex: range.start + i]; } - (BOOL)isEqual: (id)object { + OFAutoreleasePool *pool; OFString *otherString; + const of_unichar_t *unicodeString, *otherUnicodeString; + size_t length; if (![object isKindOfClass: [OFString class]]) return NO; otherString = object; + length = [self length]; - if ([otherString UTF8StringLength] != s->cStringLength || - otherString->s->length != s->length) + if ([otherString length] != length) + return NO; + + pool = [[OFAutoreleasePool alloc] init]; + + unicodeString = [self unicodeString]; + otherUnicodeString = [otherString unicodeString]; + + if (memcmp(unicodeString, otherUnicodeString, + length * sizeof(of_unichar_t))) { + [pool release]; return NO; + } - if (strcmp(s->cString, otherString->s->cString)) - return NO; + [pool release]; return YES; } - copy @@ -1193,126 +995,120 @@ return [[OFMutableString alloc] initWithString: self]; } - (of_comparison_result_t)compare: (id)object { + OFAutoreleasePool *pool; OFString *otherString; - size_t otherCStringLength, minimumCStringLength; - int compare; + const of_unichar_t *unicodeString, *otherUnicodeString; + size_t i, minimumLength; if (![object isKindOfClass: [OFString class]]) @throw [OFInvalidArgumentException exceptionWithClass: isa selector: _cmd]; otherString = object; - otherCStringLength = [otherString UTF8StringLength]; - minimumCStringLength = (s->cStringLength > otherCStringLength - ? otherCStringLength : s->cStringLength); - - if ((compare = memcmp(s->cString, [otherString UTF8String], - minimumCStringLength)) == 0) { - if (s->cStringLength > otherCStringLength) - return OF_ORDERED_DESCENDING; - if (s->cStringLength < otherCStringLength) - return OF_ORDERED_ASCENDING; - return OF_ORDERED_SAME; - } - - if (compare > 0) - return OF_ORDERED_DESCENDING; - else - return OF_ORDERED_ASCENDING; + minimumLength = ([self length] > [otherString length] + ? [otherString length] : [self length]); + + pool = [[OFAutoreleasePool alloc] init]; + + unicodeString = [self unicodeString]; + otherUnicodeString = [otherString unicodeString]; + + for (i = 0; i < minimumLength; i++) { + if (unicodeString[i] > otherUnicodeString[i]) { + [pool release]; + return OF_ORDERED_DESCENDING; + } + + if (unicodeString[i] < otherUnicodeString[i]) { + [pool release]; + return OF_ORDERED_ASCENDING; + } + } + + [pool release]; + + if ([self length] > [otherString length]) + return OF_ORDERED_DESCENDING; + if ([self length] < [otherString length]) + return OF_ORDERED_ASCENDING; + + return OF_ORDERED_SAME; } - (of_comparison_result_t)caseInsensitiveCompare: (OFString*)otherString { - const char *otherCString; - size_t i, j, otherCStringLength, minimumCStringLength; - int compare; - - if (![otherString isKindOfClass: [OFString class]]) - @throw [OFInvalidArgumentException exceptionWithClass: isa - selector: _cmd]; - - otherCString = [otherString UTF8String]; - otherCStringLength = [otherString UTF8StringLength]; - - if (!s->UTF8) { - minimumCStringLength = (s->cStringLength > otherCStringLength - ? otherCStringLength : s->cStringLength); - - if ((compare = memcasecmp(s->cString, otherCString, - minimumCStringLength)) == 0) { - if (s->cStringLength > otherCStringLength) - return OF_ORDERED_DESCENDING; - if (s->cStringLength < otherCStringLength) - return OF_ORDERED_ASCENDING; - return OF_ORDERED_SAME; - } - - if (compare > 0) - return OF_ORDERED_DESCENDING; - else - return OF_ORDERED_ASCENDING; - } - - i = j = 0; - - while (i < s->cStringLength && j < otherCStringLength) { - of_unichar_t c1, c2; - size_t l1, l2; - - l1 = of_string_utf8_to_unicode(s->cString + i, - s->cStringLength - i, &c1); - l2 = of_string_utf8_to_unicode(otherCString + j, - otherCStringLength - j, &c2); - - if (l1 == 0 || l2 == 0 || c1 > 0x10FFFF || c2 > 0x10FFFF) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { - of_unichar_t tc = - of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF]; - - if (tc) - c1 = tc; - } - - if (c2 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { - of_unichar_t tc = - of_unicode_casefolding_table[c2 >> 8][c2 & 0xFF]; - - if (tc) - c2 = tc; - } - - if (c1 > c2) - return OF_ORDERED_DESCENDING; - if (c1 < c2) - return OF_ORDERED_ASCENDING; - - i += l1; - j += l2; - } - - if (s->cStringLength - i > otherCStringLength - j) - return OF_ORDERED_DESCENDING; - else if (s->cStringLength - i < otherCStringLength - j) + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *string, *otherUnicodeString; + size_t i, length, otherLength, minimumLength; + + string = [self unicodeString]; + otherUnicodeString = [otherString unicodeString]; + length = [self length]; + otherLength = [otherString length]; + + minimumLength = (length > otherLength ? otherLength : length); + + for (i = 0; i < minimumLength; i++) { + of_unichar_t c = string[i]; + of_unichar_t oc = otherUnicodeString[i]; + + if (c >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { + of_unichar_t tc = + of_unicode_casefolding_table[c >> 8][c & 0xFF]; + + if (tc) + c = tc; + } + + if (oc >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { + of_unichar_t tc = + of_unicode_casefolding_table[oc >> 8][oc & 0xFF]; + + if (tc) + oc = tc; + } + + if (c > oc) { + [pool release]; + return OF_ORDERED_DESCENDING; + } + if (c < oc) { + [pool release]; + return OF_ORDERED_ASCENDING; + } + } + + [pool release]; + + if (length > otherLength) + return OF_ORDERED_DESCENDING; + if (length < otherLength) return OF_ORDERED_ASCENDING; return OF_ORDERED_SAME; } - (uint32_t)hash { + const of_unichar_t *unicodeString = [self unicodeString]; + size_t i, length = [self length]; uint32_t hash; - size_t i; OF_HASH_INIT(hash); - for (i = 0; i < s->cStringLength; i++) - OF_HASH_ADD(hash, s->cString[i]); + + for (i = 0; i < length; i++) { + const of_unichar_t c = unicodeString[i]; + + OF_HASH_ADD(hash, (c & 0xFF000000) >> 24); + OF_HASH_ADD(hash, (c & 0x00FF0000) >> 16); + OF_HASH_ADD(hash, (c & 0x00FF0000) >> 8); + OF_HASH_ADD(hash, c & 0xFF); + } + OF_HASH_FINALIZE(hash); return hash; } @@ -1325,14 +1121,14 @@ { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; OFXMLElement *element; OFString *className; - if ([self isKindOfClass: [OFConstantString class]]) - className = @"OFString"; + if ([self isKindOfClass: [OFMutableString class]]) + className = @"OFMutableString"; else - className = [self className]; + className = @"OFString"; element = [OFXMLElement elementWithName: className namespace: OF_SERIALIZATION_NS stringValue: self]; @@ -1341,105 +1137,119 @@ [element autorelease]; return element; } -- (of_unichar_t)characterAtIndex: (size_t)index -{ - of_unichar_t character; - - if (index >= s->length) - @throw [OFOutOfRangeException exceptionWithClass: isa]; - - if (!s->UTF8) - return s->cString[index]; - - index = of_string_index_to_position(s->cString, index, - s->cStringLength); - - if (!of_string_utf8_to_unicode(s->cString + index, - s->cStringLength - index, &character)) - @throw [OFInvalidEncodingException exceptionWithClass: isa]; - - return character; -} - - (size_t)indexOfFirstOccurrenceOfString: (OFString*)string { - const char *cString = [string UTF8String]; - size_t i, cStringLength = [string UTF8StringLength]; + OFAutoreleasePool *pool; + const of_unichar_t *unicodeString, *searchString; + size_t i, length, searchLength; - if (cStringLength == 0) - return 0; + if ((searchLength = [string length]) == 0) + return [self length]; - if (cStringLength > s->cStringLength) + if (searchLength > (length = [self length])) return OF_INVALID_INDEX; - for (i = 0; i <= s->cStringLength - cStringLength; i++) - if (!memcmp(s->cString + i, cString, cStringLength)) - return of_string_position_to_index(s->cString, i); + pool = [[OFAutoreleasePool alloc] init]; + + unicodeString = [self unicodeString]; + searchString = [string unicodeString]; + + for (i = 0; i <= length - searchLength; i++) { + if (!memcmp(unicodeString + i, searchString, + searchLength * sizeof(of_unichar_t))) { + [pool release]; + return i; + } + } + + [pool release]; return OF_INVALID_INDEX; } - (size_t)indexOfLastOccurrenceOfString: (OFString*)string { - const char *cString = [string UTF8String]; - size_t i, cStringLength = [string UTF8StringLength]; - - if (cStringLength == 0) - return of_string_position_to_index(s->cString, - s->cStringLength); - - if (cStringLength > s->cStringLength) - return OF_INVALID_INDEX; - - for (i = s->cStringLength - cStringLength;; i--) { - if (!memcmp(s->cString + i, cString, cStringLength)) - return of_string_position_to_index(s->cString, i); - - /* Did not match and we're at the last char */ - if (i == 0) - return OF_INVALID_INDEX; - } + OFAutoreleasePool *pool; + const of_unichar_t *unicodeString, *searchString; + size_t i, length, searchLength; + + if ((searchLength = [string length]) == 0) + return [self length]; + + if (searchLength > (length = [self length])) + return OF_INVALID_INDEX; + + pool = [[OFAutoreleasePool alloc] init]; + + unicodeString = [self unicodeString]; + searchString = [string unicodeString]; + + for (i = length - searchLength;; i--) { + if (!memcmp(unicodeString + i, searchString, + searchLength * sizeof(of_unichar_t))) { + [pool release]; + return i; + } + + /* Did not match and we're at the last character */ + if (i == 0) + break; + } + + [pool release]; + + return OF_INVALID_INDEX; } - (BOOL)containsString: (OFString*)string { - const char *cString = [string UTF8String]; - size_t i, cStringLength = string->s->cStringLength; + OFAutoreleasePool *pool; + const of_unichar_t *unicodeString, *searchString; + size_t i, length, searchLength; - if (cStringLength == 0) + if ((searchLength = [string length]) == 0) return YES; - if (cStringLength > s->cStringLength) + if (searchLength > (length = [self length])) return NO; - for (i = 0; i <= s->cStringLength - cStringLength; i++) - if (!memcmp(s->cString + i, cString, cStringLength)) + pool = [[OFAutoreleasePool alloc] init]; + + unicodeString = [self unicodeString]; + searchString = [string unicodeString]; + + for (i = 0; i <= length - searchLength; i++) { + if (!memcmp(unicodeString + i, searchString, + searchLength * sizeof(of_unichar_t))) { + [pool release]; return YES; + } + } + + [pool release]; return NO; } - (OFString*)substringWithRange: (of_range_t)range { - size_t start = range.start; - size_t end = range.start + range.length; + OFAutoreleasePool *pool; + OFString *ret; - if (end > s->length) + if (range.start + range.length > [self length]) @throw [OFOutOfRangeException exceptionWithClass: isa]; - if (s->UTF8) { - start = of_string_index_to_position(s->cString, start, - s->cStringLength); - end = of_string_index_to_position(s->cString, end, - s->cStringLength); - } - - return [OFString stringWithUTF8String: s->cString + start - length: end - start]; + pool = [[OFAutoreleasePool alloc] init]; + ret = [[OFString alloc] + initWithUnicodeString: [self unicodeString] + range.start + length: range.length]; + [pool release]; + + return [ret autorelease]; } - (OFString*)stringByAppendingString: (OFString*)string { OFMutableString *new; @@ -1531,58 +1341,98 @@ return new; } - (BOOL)hasPrefix: (OFString*)prefix { - size_t cStringLength = [prefix UTF8StringLength]; + OFAutoreleasePool *pool; + of_unichar_t *tmp; + const of_unichar_t *prefixString; + size_t prefixLength; + int compare; - if (cStringLength > s->cStringLength) + if ((prefixLength = [prefix length]) > [self length]) return NO; - return !memcmp(s->cString, [prefix UTF8String], cStringLength); + tmp = alloca(prefixLength * sizeof(of_unichar_t)); + [self getCharacters: tmp + inRange: of_range(0, prefixLength)]; + + pool = [[OFAutoreleasePool alloc] init]; + + prefixString = [prefix unicodeString]; + compare = memcmp(tmp, prefixString, + prefixLength * sizeof(of_unichar_t)); + + [pool release]; + + return !compare; } - (BOOL)hasSuffix: (OFString*)suffix { - size_t cStringLength = [suffix UTF8StringLength]; + OFAutoreleasePool *pool; + of_unichar_t *tmp; + const of_unichar_t *suffixString; + size_t length, suffixLength; + int compare; - if (cStringLength > s->cStringLength) + if ((suffixLength = [suffix length]) > [self length]) return NO; - return !memcmp(s->cString + (s->cStringLength - cStringLength), - [suffix UTF8String], cStringLength); + length = [self length]; + + tmp = alloca(suffixLength * sizeof(of_unichar_t)); + [self getCharacters: tmp + inRange: of_range(length - suffixLength, suffixLength)]; + + pool = [[OFAutoreleasePool alloc] init]; + + suffixString = [suffix unicodeString]; + compare = memcmp(tmp, suffixString, + suffixLength * sizeof(of_unichar_t)); + + [pool release]; + + return !compare; } - (OFArray*)componentsSeparatedByString: (OFString*)delimiter { OFAutoreleasePool *pool; - OFMutableArray *array; - const char *cString = [delimiter UTF8String]; - size_t cStringLength = [delimiter UTF8StringLength]; + OFMutableArray *array = [OFMutableArray array]; + const of_unichar_t *string, *delimiterString; + size_t length = [self length]; + size_t delimiterLength = [delimiter length]; size_t i, last; - array = [OFMutableArray array]; pool = [[OFAutoreleasePool alloc] init]; - if (cStringLength > s->cStringLength) { + string = [self unicodeString]; + delimiterString = [delimiter unicodeString]; + + if (delimiterLength > length) { [array addObject: [[self copy] autorelease]]; + [array makeImmutable]; + [pool release]; return array; } - for (i = 0, last = 0; i <= s->cStringLength - cStringLength; i++) { - if (memcmp(s->cString + i, cString, cStringLength)) + for (i = 0, last = 0; i <= length - delimiterLength; i++) { + if (memcmp(string + i, delimiterString, + delimiterLength * sizeof(of_unichar_t))) continue; - [array addObject: - [OFString stringWithUTF8String: s->cString + last - length: i - last]]; - i += cStringLength - 1; + [array addObject: [self substringWithRange: + of_range(last, i - last)]]; + + i += delimiterLength - 1; last = i + 1; } - [array addObject: [OFString stringWithUTF8String: s->cString + last]]; + [array addObject: + [self substringWithRange: of_range(last, length - last)]]; [array makeImmutable]; [pool release]; @@ -1591,42 +1441,43 @@ - (OFArray*)pathComponents { OFMutableArray *ret; OFAutoreleasePool *pool; - size_t i, last = 0, pathCStringLength = s->cStringLength; + const of_unichar_t *string; + size_t i, last = 0, length = [self length]; ret = [OFMutableArray array]; - if (pathCStringLength == 0) + if (length == 0) return ret; pool = [[OFAutoreleasePool alloc] init]; -#ifndef _WIN32 - if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) -#else - if (s->cString[pathCStringLength - 1] == '/' || - s->cString[pathCStringLength - 1] == '\\') -#endif - pathCStringLength--; - - for (i = 0; i < pathCStringLength; i++) { -#ifndef _WIN32 - if (s->cString[i] == OF_PATH_DELIMITER) { -#else - if (s->cString[i] == '/' || s->cString[i] == '\\') { -#endif - [ret addObject: - [OFString stringWithUTF8String: s->cString + last - length: i - last]]; + string = [self unicodeString]; + +#ifndef _WIN32 + if (string[length - 1] == OF_PATH_DELIMITER) +#else + if (string[length - 1] == '/' || string[length - 1] == '\\') +#endif + length--; + + for (i = 0; i < length; i++) { +#ifndef _WIN32 + if (string[i] == OF_PATH_DELIMITER) { +#else + if (string[i] == '/' || string[i] == '\\') { +#endif + [ret addObject: [self substringWithRange: + of_range(last, i - last)]]; + last = i + 1; } } - [ret addObject: [OFString stringWithUTF8String: s->cString + last - length: i - last]]; + [ret addObject: [self substringWithRange: of_range(last, i - last)]]; [ret makeImmutable]; [pool release]; @@ -1633,180 +1484,208 @@ return ret; } - (OFString*)lastPathComponent { - size_t pathCStringLength = s->cStringLength; + OFAutoreleasePool *pool; + const of_unichar_t *string; + size_t length = [self length]; ssize_t i; - if (pathCStringLength == 0) + if (length == 0) return @""; + pool = [[OFAutoreleasePool alloc] init]; + + string = [self unicodeString]; + #ifndef _WIN32 - if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) + if (string[length - 1] == OF_PATH_DELIMITER) #else - if (s->cString[pathCStringLength - 1] == '/' || - s->cString[pathCStringLength - 1] == '\\') + if (string[length - 1] == '/' || string[length - 1] == '\\') #endif - pathCStringLength--; + length--; - for (i = pathCStringLength - 1; i >= 0; i--) { + for (i = length - 1; i >= 0; i--) { #ifndef _WIN32 - if (s->cString[i] == OF_PATH_DELIMITER) { + if (string[i] == OF_PATH_DELIMITER) { #else - if (s->cString[i] == '/' || s->cString[i] == '\\') { + if (string[i] == '/' || string[i] == '\\') { #endif i++; break; } } + + [pool release]; /* * Only one component, but the trailing delimiter might have been * removed, so return a new string anyway. */ if (i < 0) i = 0; - return [OFString stringWithUTF8String: s->cString + i - length: pathCStringLength - i]; + return [self substringWithRange: of_range(i, length - i)]; } - (OFString*)stringByDeletingLastPathComponent { - size_t i, pathCStringLength = s->cStringLength; + OFAutoreleasePool *pool; + const of_unichar_t *string; + size_t i, length = [self length]; - if (pathCStringLength == 0) + if (length == 0) return @""; -#ifndef _WIN32 - if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) -#else - if (s->cString[pathCStringLength - 1] == '/' || - s->cString[pathCStringLength - 1] == '\\') -#endif - pathCStringLength--; - - if (pathCStringLength == 0) - return [OFString stringWithUTF8String: s->cString - length: 1]; - - for (i = pathCStringLength - 1; i >= 1; i--) -#ifndef _WIN32 - if (s->cString[i] == OF_PATH_DELIMITER) -#else - if (s->cString[i] == '/' || s->cString[i] == '\\') -#endif - return [OFString stringWithUTF8String: s->cString - length: i]; - -#ifndef _WIN32 - if (s->cString[0] == OF_PATH_DELIMITER) -#else - if (s->cString[0] == '/' || s->cString[0] == '\\') -#endif - return [OFString stringWithUTF8String: s->cString - length: 1]; + pool = [[OFAutoreleasePool alloc] init]; + + string = [self unicodeString]; + +#ifndef _WIN32 + if (string[length - 1] == OF_PATH_DELIMITER) +#else + if (string[length - 1] == '/' || string[length - 1] == '\\') +#endif + length--; + + if (length == 0) { + [pool release]; + return [self substringWithRange: of_range(0, 1)]; + } + + for (i = length - 1; i >= 1; i--) { +#ifndef _WIN32 + if (string[i] == OF_PATH_DELIMITER) { +#else + if (string[i] == '/' || string[i] == '\\') { +#endif + [pool release]; + return [self substringWithRange: of_range(0, i)]; + } + } + +#ifndef _WIN32 + if (string[0] == OF_PATH_DELIMITER) { +#else + if (string[0] == '/' || string[0] == '\\') { +#endif + [pool release]; + return [self substringWithRange: of_range(0, 1)]; + } + + [pool release]; return @"."; } - (intmax_t)decimalValue { - const char *cString = s->cString; - size_t cStringLength = s->cStringLength; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *string = [self unicodeString]; + size_t length = [self length]; int i = 0; intmax_t value = 0; BOOL expectWhitespace = NO; - while (*cString == ' ' || *cString == '\t' || *cString == '\n' || - *cString == '\r' || *cString == '\f') { - cString++; - cStringLength--; + while (*string == ' ' || *string == '\t' || *string == '\n' || + *string == '\r' || *string == '\f') { + string++; + length--; + } + + if (length == 0) { + [pool release]; + return 0; } - if (cString[0] == '-' || cString[0] == '+') + if (string[0] == '-' || string[0] == '+') i++; - for (; i < cStringLength; i++) { + for (; i < length; i++) { if (expectWhitespace) { - if (cString[i] != ' ' && cString[i] != '\t' && - cString[i] != '\n' && cString[i] != '\r' && - cString[i] != '\f') + if (string[i] != ' ' && string[i] != '\t' && + string[i] != '\n' && string[i] != '\r' && + string[i] != '\f') @throw [OFInvalidFormatException exceptionWithClass: isa]; continue; } - if (cString[i] >= '0' && cString[i] <= '9') { + if (string[i] >= '0' && string[i] <= '9') { if (INTMAX_MAX / 10 < value || - INTMAX_MAX - value * 10 < cString[i] - '0') + INTMAX_MAX - value * 10 < string[i] - '0') @throw [OFOutOfRangeException exceptionWithClass: isa]; - value = (value * 10) + (cString[i] - '0'); - } else if (cString[i] == ' ' || cString[i] == '\t' || - cString[i] == '\n' || cString[i] == '\r' || - cString[i] == '\f') + value = (value * 10) + (string[i] - '0'); + } else if (string[i] == ' ' || string[i] == '\t' || + string[i] == '\n' || string[i] == '\r' || + string[i] == '\f') expectWhitespace = YES; else @throw [OFInvalidFormatException exceptionWithClass: isa]; } - if (cString[0] == '-') + if (string[0] == '-') value *= -1; + + [pool release]; return value; } - (uintmax_t)hexadecimalValue { - const char *cString = s->cString; - size_t cStringLength = s->cStringLength; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *string = [self unicodeString]; + size_t length = [self length]; int i = 0; uintmax_t value = 0; BOOL expectWhitespace = NO, foundValue = NO; - while (*cString == ' ' || *cString == '\t' || *cString == '\n' || - *cString == '\r' || *cString == '\f') { - cString++; - cStringLength--; - } - - if (cStringLength == 0) - return 0; - - if (cStringLength >= 2 && cString[0] == '0' && cString[1] == 'x') - i = 2; - else if (cStringLength >= 1 && (cString[0] == 'x' || cString[0] == '$')) - i = 1; - - for (; i < cStringLength; i++) { + while (*string == ' ' || *string == '\t' || *string == '\n' || + *string == '\r' || *string == '\f') { + string++; + length--; + } + + if (length == 0) { + [pool release]; + return 0; + } + + if (length >= 2 && string[0] == '0' && string[1] == 'x') + i = 2; + else if (length >= 1 && (string[0] == 'x' || string[0] == '$')) + i = 1; + + for (; i < length; i++) { uintmax_t newValue; if (expectWhitespace) { - if (cString[i] != ' ' && cString[i] != '\t' && - cString[i] != '\n' && cString[i] != '\r' && - cString[i] != '\f') + if (string[i] != ' ' && string[i] != '\t' && + string[i] != '\n' && string[i] != '\r' && + string[i] != '\f') @throw [OFInvalidFormatException exceptionWithClass: isa]; continue; } - if (cString[i] >= '0' && cString[i] <= '9') { - newValue = (value << 4) | (cString[i] - '0'); - foundValue = YES; - } else if (cString[i] >= 'A' && cString[i] <= 'F') { - newValue = (value << 4) | (cString[i] - 'A' + 10); - foundValue = YES; - } else if (cString[i] >= 'a' && cString[i] <= 'f') { - newValue = (value << 4) | (cString[i] - 'a' + 10); - foundValue = YES; - } else if (cString[i] == 'h' || cString[i] == ' ' || - cString[i] == '\t' || cString[i] == '\n' || - cString[i] == '\r' || cString[i] == '\f') { + if (string[i] >= '0' && string[i] <= '9') { + newValue = (value << 4) | (string[i] - '0'); + foundValue = YES; + } else if (string[i] >= 'A' && string[i] <= 'F') { + newValue = (value << 4) | (string[i] - 'A' + 10); + foundValue = YES; + } else if (string[i] >= 'a' && string[i] <= 'f') { + newValue = (value << 4) | (string[i] - 'a' + 10); + foundValue = YES; + } else if (string[i] == 'h' || string[i] == ' ' || + string[i] == '\t' || string[i] == '\n' || + string[i] == '\r' || string[i] == '\f') { expectWhitespace = YES; continue; } else @throw [OFInvalidFormatException exceptionWithClass: isa]; @@ -1818,16 +1697,19 @@ } if (!foundValue) @throw [OFInvalidFormatException exceptionWithClass: isa]; + [pool release]; + return value; } - (float)floatValue { - const char *cString = s->cString; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const char *cString = [self UTF8String]; char *endPointer = NULL; float value; while (*cString == ' ' || *cString == '\t' || *cString == '\n' || *cString == '\r' || *cString == '\f') @@ -1842,16 +1724,19 @@ *endPointer != '\n' && *endPointer != '\r' && *endPointer != '\f') @throw [OFInvalidFormatException exceptionWithClass: isa]; + [pool release]; + return value; } - (double)doubleValue { - const char *cString = s->cString; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const char *cString = [self UTF8String]; char *endPointer = NULL; double value; while (*cString == ' ' || *cString == '\t' || *cString == '\n' || *cString == '\r' || *cString == '\f') @@ -1866,77 +1751,58 @@ *endPointer != '\n' && *endPointer != '\r' && *endPointer != '\f') @throw [OFInvalidFormatException exceptionWithClass: isa]; + [pool release]; + return value; } - (const of_unichar_t*)unicodeString { OFObject *object = [[[OFObject alloc] init] autorelease]; + size_t length = [self length]; of_unichar_t *ret; - size_t i, j; - ret = [object allocMemoryForNItems: s->length + 1 + ret = [object allocMemoryForNItems: length + 1 ofSize: sizeof(of_unichar_t)]; - - i = 0; - j = 0; - - while (i < s->cStringLength) { - of_unichar_t c; - size_t cLen; - - cLen = of_string_utf8_to_unicode(s->cString + i, - s->cStringLength - i, &c); - - if (cLen == 0 || c > 0x10FFFF) - @throw [OFInvalidEncodingException - exceptionWithClass: isa]; - - ret[j++] = c; - i += cLen; - } - - ret[j] = 0; + [self getCharacters: ret + inRange: of_range(0, length)]; + ret[length] = 0; return ret; } - (const uint16_t*)UTF16String { OFObject *object = [[[OFObject alloc] init] autorelease]; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *unicodeString = [self unicodeString]; + size_t length = [self length]; uint16_t *ret; size_t i, j; /* Allocate memory for the worst case */ - ret = [object allocMemoryForNItems: s->length * 2 + 1 + ret = [object allocMemoryForNItems: length * 2 + 1 ofSize: sizeof(uint16_t)]; - i = 0; j = 0; - while (i < s->cStringLength) { - of_unichar_t c; - size_t cLen; - - cLen = of_string_utf8_to_unicode(s->cString + i, - s->cStringLength - i, &c); - - if (cLen == 0 || c > 0x10FFFF) + for (i = 0; i < length; i++) { + of_unichar_t c = unicodeString[i]; + + if (c > 0x10FFFF) @throw [OFInvalidEncodingException exceptionWithClass: isa]; if (c > 0xFFFF) { c -= 0x10000; ret[j++] = of_bswap16_if_le(0xD800 | (c >> 10)); ret[j++] = of_bswap16_if_le(0xDC00 | (c & 0x3FF)); } else ret[j++] = of_bswap16_if_le(c); - - i += cLen; } ret[j] = 0; @try { @@ -1944,10 +1810,12 @@ toNItems: j + 1 ofSize: sizeof(uint16_t)]; } @catch (OFOutOfMemoryException *e) { /* We don't care, as we only tried to make it smaller */ } + + [pool release]; return ret; } - (void)writeToFile: (OFString*)path @@ -1963,41 +1831,39 @@ } #ifdef OF_HAVE_BLOCKS - (void)enumerateLinesUsingBlock: (of_string_line_enumeration_block_t)block { - OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - const char *cString = s->cString; - const char *last = cString; + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init], *pool2; + const of_unichar_t *string = [self unicodeString]; + size_t i, last = 0, length = [self length]; BOOL stop = NO, lastCarriageReturn = NO; - while (!stop && *cString != 0) { - if (lastCarriageReturn && *cString == '\n') { - lastCarriageReturn = NO; + pool2 = [[OFAutoreleasePool alloc] init]; - cString++; + for (i = 0; i < length && !stop; i++) { + if (lastCarriageReturn && string[i] == '\n') { + lastCarriageReturn = NO; last++; continue; } - if (*cString == '\n' || *cString == '\r') { - block([OFString - stringWithUTF8String: last - length: cString - last], &stop); - last = cString + 1; + if (string[i] == '\n' || string[i] == '\r') { + block([self substringWithRange: + of_range(last, i - last)], &stop); + last = i + 1; - [pool releaseObjects]; + [pool2 releaseObjects]; } - lastCarriageReturn = (*cString == '\r'); - cString++; + lastCarriageReturn = (string[i] == '\r'); } if (!stop) - block([OFString stringWithUTF8String: last - length: cString - last], &stop); + block([self substringWithRange: of_range(last, i - last)], + &stop); [pool release]; } #endif @end ADDED src/OFString_UTF8.h Index: src/OFString_UTF8.h ================================================================== --- src/OFString_UTF8.h +++ src/OFString_UTF8.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#import "OFString.h" + +@interface OFString_UTF8: OFString +{ +@public + struct of_string_utf8_ivars { + char *cString; + size_t cStringLength; + BOOL UTF8; + size_t length; + } *restrict s; +} +@end ADDED src/OFString_UTF8.m Index: src/OFString_UTF8.m ================================================================== --- src/OFString_UTF8.m +++ src/OFString_UTF8.m @@ -0,0 +1,1084 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#include "config.h" + +#include +#include +#include +#include + +#import "OFString_UTF8.h" +#import "OFMutableString_UTF8.h" +#import "OFArray.h" +#import "OFAutoreleasePool.h" + +#import "OFInitializationFailedException.h" +#import "OFInvalidArgumentException.h" +#import "OFInvalidEncodingException.h" +#import "OFInvalidFormatException.h" +#import "OFNotImplementedException.h" +#import "OFOutOfMemoryException.h" +#import "OFOutOfRangeException.h" + +#import "macros.h" +#import "of_asprintf.h" +#import "unicode.h" + +extern const uint16_t of_iso_8859_15[256]; +extern const uint16_t of_windows_1252[256]; + +static inline int +memcasecmp(const char *first, const char *second, size_t length) +{ + size_t i; + + for (i = 0; i < length; i++) { + if (tolower((int)first[i]) > tolower((int)second[i])) + return OF_ORDERED_DESCENDING; + if (tolower((int)first[i]) < tolower((int)second[i])) + return OF_ORDERED_ASCENDING; + } + + return OF_ORDERED_SAME; +} + +@implementation OFString_UTF8 +- init +{ + self = [super init]; + + @try { + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cString = [self allocMemoryWithSize: 1]; + s->cString[0] = '\0'; + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithCString: (const char*)cString + encoding: (of_string_encoding_t)encoding + length: (size_t)cStringLength +{ + self = [super init]; + + @try { + size_t i, j; + const uint16_t *table; + + if (encoding == OF_STRING_ENCODING_UTF_8 && + cStringLength >= 3 && !memcmp(cString, "\xEF\xBB\xBF", 3)) { + cString += 3; + cStringLength -= 3; + } + + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cString = [self allocMemoryWithSize: cStringLength + 1]; + s->cStringLength = cStringLength; + + if (encoding == OF_STRING_ENCODING_UTF_8 || + encoding == OF_STRING_ENCODING_ASCII) { + switch (of_string_check_utf8(cString, cStringLength, + &s->length)) { + case 1: + if (encoding == OF_STRING_ENCODING_ASCII) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + s->UTF8 = YES; + break; + case -1: + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + + memcpy(s->cString, cString, cStringLength); + s->cString[cStringLength] = 0; + + return self; + } + + /* All other encodings we support are single byte encodings */ + s->length = cStringLength; + + if (encoding == OF_STRING_ENCODING_ISO_8859_1) { + for (i = j = 0; i < cStringLength; i++) { + char buffer[4]; + size_t bytes; + + if (!(cString[i] & 0x80)) { + s->cString[j++] = cString[i]; + continue; + } + + s->UTF8 = YES; + bytes = of_string_unicode_to_utf8( + (uint8_t)cString[i], buffer); + + if (bytes == 0) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + s->cStringLength += bytes - 1; + s->cString = [self + resizeMemory: s->cString + toSize: s->cStringLength + 1]; + + memcpy(s->cString + j, buffer, bytes); + j += bytes; + } + + s->cString[s->cStringLength] = 0; + + return self; + } + + switch (encoding) { + case OF_STRING_ENCODING_ISO_8859_15: + table = of_iso_8859_15; + break; + case OF_STRING_ENCODING_WINDOWS_1252: + table = of_windows_1252; + break; + default: + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + + for (i = j = 0; i < cStringLength; i++) { + char buffer[4]; + of_unichar_t character; + size_t characterBytes; + + if (!(cString[i] & 0x80)) { + s->cString[j++] = cString[i]; + continue; + } + + character = table[(uint8_t)cString[i]]; + + if (character == 0xFFFD) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + s->UTF8 = YES; + characterBytes = of_string_unicode_to_utf8(character, + buffer); + + if (characterBytes == 0) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + s->cStringLength += characterBytes - 1; + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + + memcpy(s->cString + j, buffer, characterBytes); + j += characterBytes; + } + + s->cString[s->cStringLength] = 0; + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithString: (OFString*)string +{ + self = [super init]; + + @try { + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cStringLength = [string UTF8StringLength]; + + if ([string isKindOfClass: [OFString_UTF8 class]] || + [string isKindOfClass: [OFMutableString_UTF8 class]]) + s->UTF8 = ((OFString_UTF8*)string)->s->UTF8; + else + s->UTF8 = YES; + + s->length = [string length]; + + s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; + memcpy(s->cString, [string UTF8String], s->cStringLength + 1); + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithUnicodeString: (const of_unichar_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + self = [super init]; + + @try { + size_t i, j = 0; + BOOL swap = NO; + + if (length > 0 && *string == 0xFEFF) { + string++; + length--; + } else if (length > 0 && *string == 0xFFFE0000) { + swap = YES; + string++; + length--; + } else if (byteOrder != OF_ENDIANESS_NATIVE) + swap = YES; + + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cStringLength = length; + s->cString = [self allocMemoryWithSize: (length * 4) + 1]; + s->length = length; + + for (i = 0; i < length; i++) { + char buffer[4]; + size_t characterLen = of_string_unicode_to_utf8( + (swap ? of_bswap32(string[i]) : string[i]), + buffer); + + switch (characterLen) { + case 1: + s->cString[j++] = buffer[0]; + break; + case 2: + s->UTF8 = YES; + s->cStringLength++; + + memcpy(s->cString + j, buffer, 2); + j += 2; + + break; + case 3: + s->UTF8 = YES; + s->cStringLength += 2; + + memcpy(s->cString + j, buffer, 3); + j += 3; + + break; + case 4: + s->UTF8 = YES; + s->cStringLength += 3; + + memcpy(s->cString + j, buffer, 4); + j += 4; + + break; + default: + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + } + + s->cString[j] = '\0'; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't care, as we only tried to make it smaller */ + } + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_endianess_t)byteOrder + length: (size_t)length +{ + self = [super init]; + + @try { + size_t i, j = 0; + BOOL swap = NO; + + if (length > 0 && *string == 0xFEFF) { + string++; + length--; + } else if (length > 0 && *string == 0xFFFE) { + swap = YES; + string++; + length--; + } else if (byteOrder != OF_ENDIANESS_NATIVE) + swap = YES; + + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cStringLength = length; + s->cString = [self allocMemoryWithSize: (length * 4) + 1]; + s->length = length; + + for (i = 0; i < length; i++) { + char buffer[4]; + of_unichar_t character = + (swap ? of_bswap16(string[i]) : string[i]); + size_t characterLen; + + /* Missing high surrogate */ + if ((character & 0xFC00) == 0xDC00) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + if ((character & 0xFC00) == 0xD800) { + uint16_t nextCharacter; + + if (length <= i + 1) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + nextCharacter = (swap + ? of_bswap16(string[i + 1]) + : string[i + 1]); + character = (((character & 0x3FF) << 10) | + (nextCharacter & 0x3FF)) + 0x10000; + + i++; + s->cStringLength--; + s->length--; + } + + characterLen = of_string_unicode_to_utf8( + character, buffer); + + switch (characterLen) { + case 1: + s->cString[j++] = buffer[0]; + break; + case 2: + s->UTF8 = YES; + s->cStringLength++; + + memcpy(s->cString + j, buffer, 2); + j += 2; + + break; + case 3: + s->UTF8 = YES; + s->cStringLength += 2; + + memcpy(s->cString + j, buffer, 3); + j += 3; + + break; + case 4: + s->UTF8 = YES; + s->cStringLength += 3; + + memcpy(s->cString + j, buffer, 4); + j += 4; + + break; + default: + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + } + + s->cString[j] = '\0'; + + @try { + s->cString = [self resizeMemory: s->cString + toSize: s->cStringLength + 1]; + } @catch (OFOutOfMemoryException *e) { + /* We don't care, as we only tried to make it smaller */ + } + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithFormat: (OFConstantString*)format + arguments: (va_list)arguments +{ + self = [super init]; + + @try { + int cStringLength; + + if (format == nil) + @throw [OFInvalidArgumentException + exceptionWithClass: isa + selector: _cmd]; + + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + if ((cStringLength = of_vasprintf(&s->cString, + [format UTF8String], arguments)) == -1) + @throw [OFInvalidFormatException + exceptionWithClass: isa]; + + s->cStringLength = cStringLength; + + @try { + switch (of_string_check_utf8(s->cString, + cStringLength, &s->length)) { + case 1: + s->UTF8 = YES; + break; + case -1: + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + } + + [self addMemoryToPool: s->cString]; + } @catch (id e) { + free(s->cString); + @throw e; + } + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- initWithPath: (OFString*)firstComponent + arguments: (va_list)arguments +{ + self = [super init]; + + @try { + OFString *component; + size_t i, cStringLength; + va_list argumentsCopy; + + s = [self allocMemoryWithSize: sizeof(*s)]; + memset(s, 0, sizeof(*s)); + + s->cStringLength = [firstComponent UTF8StringLength]; + + if ([firstComponent isKindOfClass: [OFString_UTF8 class]] || + [firstComponent isKindOfClass: + [OFMutableString_UTF8 class]]) + s->UTF8 = ((OFString_UTF8*)firstComponent)->s->UTF8; + else + s->UTF8 = YES; + + s->length = [firstComponent length]; + + /* Calculate length and see if we need UTF-8 */ + va_copy(argumentsCopy, arguments); + while ((component = va_arg(argumentsCopy, OFString*)) != nil) { + s->cStringLength += 1 + [component UTF8StringLength]; + s->length += 1 + [component length]; + + if ([component isKindOfClass: [OFString_UTF8 class]] || + [component isKindOfClass: + [OFMutableString_UTF8 class]]) + s->UTF8 = ((OFString_UTF8*)component)->s->UTF8; + else + s->UTF8 = YES; + } + + s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; + + cStringLength = [firstComponent UTF8StringLength]; + memcpy(s->cString, [firstComponent UTF8String], cStringLength); + i = cStringLength; + + while ((component = va_arg(arguments, OFString*)) != nil) { + cStringLength = [component UTF8StringLength]; + + s->cString[i] = OF_PATH_DELIMITER; + memcpy(s->cString + i + 1, [component UTF8String], + cStringLength); + + i += 1 + cStringLength; + } + + s->cString[i] = '\0'; + } @catch (id e) { + [self release]; + @throw e; + } + + return self; +} + +- (const char*)UTF8String +{ + return s->cString; +} + +- (const char*)cStringWithEncoding: (of_string_encoding_t)encoding +{ + switch (encoding) { + case OF_STRING_ENCODING_UTF_8: + return s->cString; + case OF_STRING_ENCODING_ASCII: + if (s->UTF8) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + return s->cString; + default: + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; + } +} + +- (size_t)length +{ + return s->length; +} + +- (size_t)UTF8StringLength +{ + return s->cStringLength; +} + +- (size_t)cStringLengthWithEncoding: (of_string_encoding_t)encoding +{ + switch (encoding) { + case OF_STRING_ENCODING_UTF_8: + return s->cStringLength; + case OF_STRING_ENCODING_ASCII: + if (s->UTF8) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + return s->cStringLength; + default: + @throw [OFNotImplementedException exceptionWithClass: isa + selector: _cmd]; + } +} + +- (BOOL)isEqual: (id)object +{ + OFString_UTF8 *otherString; + + if (![object isKindOfClass: [OFString class]]) + return NO; + + otherString = object; + + if ([otherString UTF8StringLength] != s->cStringLength || + [otherString length] != s->length) + return NO; + + if (strcmp(s->cString, [otherString UTF8String])) + return NO; + + return YES; +} + +- (of_comparison_result_t)compare: (id)object +{ + OFString *otherString; + size_t otherCStringLength, minimumCStringLength; + int compare; + + if (![object isKindOfClass: [OFString class]]) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + otherString = object; + otherCStringLength = [otherString UTF8StringLength]; + minimumCStringLength = (s->cStringLength > otherCStringLength + ? otherCStringLength : s->cStringLength); + + if ((compare = memcmp(s->cString, [otherString UTF8String], + minimumCStringLength)) == 0) { + if (s->cStringLength > otherCStringLength) + return OF_ORDERED_DESCENDING; + if (s->cStringLength < otherCStringLength) + return OF_ORDERED_ASCENDING; + return OF_ORDERED_SAME; + } + + if (compare > 0) + return OF_ORDERED_DESCENDING; + else + return OF_ORDERED_ASCENDING; +} + +- (of_comparison_result_t)caseInsensitiveCompare: (OFString*)otherString +{ + const char *otherCString; + size_t i, j, otherCStringLength, minimumCStringLength; + int compare; + + if (![otherString isKindOfClass: [OFString class]]) + @throw [OFInvalidArgumentException exceptionWithClass: isa + selector: _cmd]; + + otherCString = [otherString UTF8String]; + otherCStringLength = [otherString UTF8StringLength]; + + if (!s->UTF8) { + minimumCStringLength = (s->cStringLength > otherCStringLength + ? otherCStringLength : s->cStringLength); + + if ((compare = memcasecmp(s->cString, otherCString, + minimumCStringLength)) == 0) { + if (s->cStringLength > otherCStringLength) + return OF_ORDERED_DESCENDING; + if (s->cStringLength < otherCStringLength) + return OF_ORDERED_ASCENDING; + return OF_ORDERED_SAME; + } + + if (compare > 0) + return OF_ORDERED_DESCENDING; + else + return OF_ORDERED_ASCENDING; + } + + i = j = 0; + + while (i < s->cStringLength && j < otherCStringLength) { + of_unichar_t c1, c2; + size_t l1, l2; + + l1 = of_string_utf8_to_unicode(s->cString + i, + s->cStringLength - i, &c1); + l2 = of_string_utf8_to_unicode(otherCString + j, + otherCStringLength - j, &c2); + + if (l1 == 0 || l2 == 0 || c1 > 0x10FFFF || c2 > 0x10FFFF) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { + of_unichar_t tc = + of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF]; + + if (tc) + c1 = tc; + } + + if (c2 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { + of_unichar_t tc = + of_unicode_casefolding_table[c2 >> 8][c2 & 0xFF]; + + if (tc) + c2 = tc; + } + + if (c1 > c2) + return OF_ORDERED_DESCENDING; + if (c1 < c2) + return OF_ORDERED_ASCENDING; + + i += l1; + j += l2; + } + + if (s->cStringLength - i > otherCStringLength - j) + return OF_ORDERED_DESCENDING; + else if (s->cStringLength - i < otherCStringLength - j) + return OF_ORDERED_ASCENDING; + + return OF_ORDERED_SAME; +} + +/* TODO +- (uint32_t)hash +{ + IMPLEMENT +} +*/ + +- (of_unichar_t)characterAtIndex: (size_t)index +{ + of_unichar_t character; + + if (index >= s->length) + @throw [OFOutOfRangeException exceptionWithClass: isa]; + + if (!s->UTF8) + return s->cString[index]; + + index = of_string_index_to_position(s->cString, index, + s->cStringLength); + + if (!of_string_utf8_to_unicode(s->cString + index, + s->cStringLength - index, &character)) + @throw [OFInvalidEncodingException exceptionWithClass: isa]; + + return character; +} + +- (void)getCharacters: (of_unichar_t*)buffer + inRange: (of_range_t)range +{ + /* TODO: Could be slightly optimized */ + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const of_unichar_t *unicodeString = [self unicodeString]; + + memcpy(buffer, unicodeString + range.start, + range.length * sizeof(of_unichar_t)); + + [pool release]; +} + +- (size_t)indexOfFirstOccurrenceOfString: (OFString*)string +{ + const char *cString = [string UTF8String]; + size_t i, cStringLength = [string UTF8StringLength]; + + if (cStringLength == 0) + return 0; + + if (cStringLength > s->cStringLength) + return OF_INVALID_INDEX; + + for (i = 0; i <= s->cStringLength - cStringLength; i++) + if (!memcmp(s->cString + i, cString, cStringLength)) + return of_string_position_to_index(s->cString, i); + + return OF_INVALID_INDEX; +} + +- (size_t)indexOfLastOccurrenceOfString: (OFString*)string +{ + const char *cString = [string UTF8String]; + size_t i, cStringLength = [string UTF8StringLength]; + + if (cStringLength == 0) + return of_string_position_to_index(s->cString, + s->cStringLength); + + if (cStringLength > s->cStringLength) + return OF_INVALID_INDEX; + + for (i = s->cStringLength - cStringLength;; i--) { + if (!memcmp(s->cString + i, cString, cStringLength)) + return of_string_position_to_index(s->cString, i); + + /* Did not match and we're at the last char */ + if (i == 0) + return OF_INVALID_INDEX; + } +} + +- (BOOL)containsString: (OFString*)string +{ + const char *cString = [string UTF8String]; + size_t i, cStringLength = [string UTF8StringLength]; + + if (cStringLength == 0) + return YES; + + if (cStringLength > s->cStringLength) + return NO; + + for (i = 0; i <= s->cStringLength - cStringLength; i++) + if (!memcmp(s->cString + i, cString, cStringLength)) + return YES; + + return NO; +} + +- (OFString*)substringWithRange: (of_range_t)range +{ + size_t start = range.start; + size_t end = range.start + range.length; + + if (end > s->length) + @throw [OFOutOfRangeException exceptionWithClass: isa]; + + if (s->UTF8) { + start = of_string_index_to_position(s->cString, start, + s->cStringLength); + end = of_string_index_to_position(s->cString, end, + s->cStringLength); + } + + return [OFString stringWithUTF8String: s->cString + start + length: end - start]; +} + +- (BOOL)hasPrefix: (OFString*)prefix +{ + size_t cStringLength = [prefix UTF8StringLength]; + + if (cStringLength > s->cStringLength) + return NO; + + return !memcmp(s->cString, [prefix UTF8String], cStringLength); +} + +- (BOOL)hasSuffix: (OFString*)suffix +{ + size_t cStringLength = [suffix UTF8StringLength]; + + if (cStringLength > s->cStringLength) + return NO; + + return !memcmp(s->cString + (s->cStringLength - cStringLength), + [suffix UTF8String], cStringLength); +} + +- (OFArray*)componentsSeparatedByString: (OFString*)delimiter +{ + OFAutoreleasePool *pool; + OFMutableArray *array; + const char *cString = [delimiter UTF8String]; + size_t cStringLength = [delimiter UTF8StringLength]; + size_t i, last; + + array = [OFMutableArray array]; + pool = [[OFAutoreleasePool alloc] init]; + + if (cStringLength > s->cStringLength) { + [array addObject: [[self copy] autorelease]]; + [pool release]; + + return array; + } + + for (i = 0, last = 0; i <= s->cStringLength - cStringLength; i++) { + if (memcmp(s->cString + i, cString, cStringLength)) + continue; + + [array addObject: + [OFString stringWithUTF8String: s->cString + last + length: i - last]]; + i += cStringLength - 1; + last = i + 1; + } + [array addObject: [OFString stringWithUTF8String: s->cString + last]]; + + [array makeImmutable]; + + [pool release]; + + return array; +} + +- (OFArray*)pathComponents +{ + OFMutableArray *ret; + OFAutoreleasePool *pool; + size_t i, last = 0, pathCStringLength = s->cStringLength; + + ret = [OFMutableArray array]; + + if (pathCStringLength == 0) + return ret; + + pool = [[OFAutoreleasePool alloc] init]; + +#ifndef _WIN32 + if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) +#else + if (s->cString[pathCStringLength - 1] == '/' || + s->cString[pathCStringLength - 1] == '\\') +#endif + pathCStringLength--; + + for (i = 0; i < pathCStringLength; i++) { +#ifndef _WIN32 + if (s->cString[i] == OF_PATH_DELIMITER) { +#else + if (s->cString[i] == '/' || s->cString[i] == '\\') { +#endif + [ret addObject: + [OFString stringWithUTF8String: s->cString + last + length: i - last]]; + last = i + 1; + } + } + + [ret addObject: [OFString stringWithUTF8String: s->cString + last + length: i - last]]; + + [ret makeImmutable]; + + [pool release]; + + return ret; +} + +- (OFString*)lastPathComponent +{ + size_t pathCStringLength = s->cStringLength; + ssize_t i; + + if (pathCStringLength == 0) + return @""; + +#ifndef _WIN32 + if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) +#else + if (s->cString[pathCStringLength - 1] == '/' || + s->cString[pathCStringLength - 1] == '\\') +#endif + pathCStringLength--; + + for (i = pathCStringLength - 1; i >= 0; i--) { +#ifndef _WIN32 + if (s->cString[i] == OF_PATH_DELIMITER) { +#else + if (s->cString[i] == '/' || s->cString[i] == '\\') { +#endif + i++; + break; + } + } + + /* + * Only one component, but the trailing delimiter might have been + * removed, so return a new string anyway. + */ + if (i < 0) + i = 0; + + return [OFString stringWithUTF8String: s->cString + i + length: pathCStringLength - i]; +} + +- (OFString*)stringByDeletingLastPathComponent +{ + size_t i, pathCStringLength = s->cStringLength; + + if (pathCStringLength == 0) + return @""; + +#ifndef _WIN32 + if (s->cString[pathCStringLength - 1] == OF_PATH_DELIMITER) +#else + if (s->cString[pathCStringLength - 1] == '/' || + s->cString[pathCStringLength - 1] == '\\') +#endif + pathCStringLength--; + + if (pathCStringLength == 0) + return [OFString stringWithUTF8String: s->cString + length: 1]; + + for (i = pathCStringLength - 1; i >= 1; i--) +#ifndef _WIN32 + if (s->cString[i] == OF_PATH_DELIMITER) +#else + if (s->cString[i] == '/' || s->cString[i] == '\\') +#endif + return [OFString stringWithUTF8String: s->cString + length: i]; + +#ifndef _WIN32 + if (s->cString[0] == OF_PATH_DELIMITER) +#else + if (s->cString[0] == '/' || s->cString[0] == '\\') +#endif + return [OFString stringWithUTF8String: s->cString + length: 1]; + + return @"."; +} + +- (const of_unichar_t*)unicodeString +{ + OFObject *object = [[[OFObject alloc] init] autorelease]; + of_unichar_t *ret; + size_t i, j; + + ret = [object allocMemoryForNItems: s->length + 1 + ofSize: sizeof(of_unichar_t)]; + + i = 0; + j = 0; + + while (i < s->cStringLength) { + of_unichar_t c; + size_t cLen; + + cLen = of_string_utf8_to_unicode(s->cString + i, + s->cStringLength - i, &c); + + if (cLen == 0 || c > 0x10FFFF) + @throw [OFInvalidEncodingException + exceptionWithClass: isa]; + + ret[j++] = c; + i += cLen; + } + + ret[j] = 0; + + return ret; +} + +#ifdef OF_HAVE_BLOCKS +- (void)enumerateLinesUsingBlock: (of_string_line_enumeration_block_t)block +{ + OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; + const char *cString = s->cString; + const char *last = cString; + BOOL stop = NO, lastCarriageReturn = NO; + + while (!stop && *cString != 0) { + if (lastCarriageReturn && *cString == '\n') { + lastCarriageReturn = NO; + + cString++; + last++; + + continue; + } + + if (*cString == '\n' || *cString == '\r') { + block([OFString + stringWithUTF8String: last + length: cString - last], &stop); + last = cString + 1; + + [pool releaseObjects]; + } + + lastCarriageReturn = (*cString == '\r'); + cString++; + } + + if (!stop) + block([OFString stringWithUTF8String: last + length: cString - last], &stop); + + [pool release]; +} +#endif +@end Index: tests/OFSet.m ================================================================== --- tests/OFSet.m +++ tests/OFSet.m @@ -49,11 +49,11 @@ TEST(@"-[hash]", [set1 hash] == [set2 hash]) TEST(@"-[description]", [[set1 description] - isEqual: @"{(\n\tbar,\n\tbaz,\n\tfoo,\n\tx\n)}"] && + isEqual: @"{(\n\tbaz,\n\tbar,\n\tx,\n\tfoo\n)}"] && [[set1 description] isEqual: [set2 description]]) TEST(@"-[copy]", [set1 isEqual: [[set1 copy] autorelease]]) TEST(@"-[mutableCopy]", @@ -96,23 +96,23 @@ i = 0; for (OFString *s in set1) { switch (i) { case 0: - if (![s isEqual: @"bar"]) + if (![s isEqual: @"baz"]) ok = NO; break; case 1: - if (![s isEqual: @"baz"]) + if (![s isEqual: @"bar"]) ok = NO; break; case 2: - if (![s isEqual: @"foo"]) + if (![s isEqual: @"x"]) ok = NO; break; case 3: - if (![s isEqual: @"x"]) + if (![s isEqual: @"foo"]) ok = NO; break; } i++; Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -115,11 +115,11 @@ R([s[1] appendUTF8String: "1𝄞"]) && R([s[1] appendString: @"3"]) && R([s[0] appendString: s[1]]) && [s[0] isEqual: @"täs€1𝄞3"]) TEST(@"-[length]", [s[0] length] == 7) TEST(@"-[UTF8StringLength]", [s[0] UTF8StringLength] == 13) - TEST(@"-[hash]", [s[0] hash] == 0xD576830E) + TEST(@"-[hash]", [s[0] hash] == 0x324B6743) TEST(@"-[characterAtIndex:]", [s[0] characterAtIndex: 0] == 't' && [s[0] characterAtIndex: 1] == 0xE4 && [s[0] characterAtIndex: 3] == 0x20AC && [s[0] characterAtIndex: 5] == 0x1D11E) @@ -412,10 +412,25 @@ EXPECT_EXCEPTION(@"Detect invalid encoding in -[stringByURLDecoding] " @"#1", OFInvalidEncodingException, [@"foo%bar" stringByURLDecoding]) EXPECT_EXCEPTION(@"Detect invalid encoding in -[stringByURLDecoding] " @"#2", OFInvalidEncodingException, [@"foo%FFbar" stringByURLDecoding]) + + TEST(@"-[setCharacter:atIndex:]", + (s[0] = [OFMutableString stringWithString: @"abäde"]) && + R([s[0] setCharacter: 0xF6 + atIndex: 2]) && + [s[0] isEqual: @"aböde"] && + R([s[0] setCharacter: 'c' + atIndex: 2]) && + [s[0] isEqual: @"abcde"] && + R([s[0] setCharacter: 0x20AC + atIndex: 3]) && + [s[0] isEqual: @"abc€e"] && + R([s[0] setCharacter: 'x' + atIndex: 1]) && + [s[0] isEqual: @"axc€e"]) TEST(@"-[deleteCharactersInRange:]", (s[0] = [OFMutableString stringWithString: @"𝄞öööbä€"]) && R([s[0] deleteCharactersInRange: of_range(1, 3)]) && [s[0] isEqual: @"𝄞bä€"] && Index: tests/serialization.xml ================================================================== --- tests/serialization.xml +++ tests/serialization.xml @@ -1,22 +1,9 @@ - - Qu"xbar -test - 1234 - 0x1.34a456d5cfaadp+10 - asd - 0x1.34a456d5cfaadp+10 - - - - Hello - - Hello Wo ld! How are you? https://webkeks.org/ @@ -60,35 +47,48 @@ - foo bar + foo + + bar + foo - - bar - list - Blub + + Qu"xbar +test + 1234 + 0x1.34a456d5cfaadp+10 + asd + 0x1.34a456d5cfaadp+10 + - B"la + Hello MDEyMzQ1Njc4OTo7PEFCQ0RFRkdISklLTE1OT1BRUlNUVVZXWFla data + + Blub + + + B"la +