Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -39,10 +39,11 @@ extern int of_string_check_utf8(const char*, size_t); extern size_t of_string_unicode_to_utf8(of_unichar_t, char*); extern size_t of_string_utf8_to_unicode(const char*, size_t, of_unichar_t*); extern size_t of_string_position_to_index(const char*, size_t); extern size_t of_string_index_to_position(const char*, size_t, size_t); +extern size_t of_unicode_string_length(const of_unichar_t*); #ifdef __cplusplus } #endif @class OFArray; @@ -111,10 +112,18 @@ * \param string A string to initialize the OFString with * \return A new autoreleased OFString */ + stringWithString: (OFString*)string; +/** + * Creates a new OFString from a unicode string. + * + * \param string The unicode string + * \return A new autoreleased OFString + */ ++ stringWithUnicodeString: (of_unichar_t*)string; + /** * Creates a new OFString from a format string. * See printf for the format syntax. * * \param format A string used as format to initialize the OFString @@ -223,10 +232,18 @@ * \param string A string to initialize the OFString with * \return An initialized OFString */ - initWithString: (OFString*)string; +/** + * Initializes an already allocated OFString with a unicode string. + * + * \param string The unicode string + * \return An initialized OFString + */ +- initWithUnicodeString: (of_unichar_t*)string; + /** * Initializes an already allocated OFString with a format string. * See printf for the format syntax. * * \param format A string used as format to initialize the OFString Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -241,10 +241,21 @@ if (++index > length) return OF_INVALID_INDEX; return index; } + +size_t +of_unicode_string_length(const of_unichar_t *string) +{ + const of_unichar_t *string_ = string; + + while (*string_ != '\0') + string_++; + + return (uintptr_t)string_ - (uintptr_t)string; +} @implementation OFString + string { return [[[self alloc] init] autorelease]; @@ -280,10 +291,15 @@ + stringWithString: (OFString*)string { return [[[self alloc] initWithString: string] autorelease]; } + ++ stringWithUnicodeString: (of_unichar_t*)string +{ + return [[[self alloc] initWithUnicodeString: string] autorelease]; +} + stringWithFormat: (OFString*)format, ... { id ret; va_list arguments; @@ -519,10 +535,88 @@ } } @catch (id e) { [self release]; @throw e; } + + return self; +} + +- initWithUnicodeString: (of_unichar_t*)string_ +{ + self = [super init]; + + @try { + char buffer[4]; + size_t i = 0; + BOOL swap = NO; + + if (*string_ == 0xFEFF) + string_++; + + if (*string_ == 0xFFFE0000) { + swap = YES; + string_++; + } + + length = of_unicode_string_length(string_); + string = [self allocMemoryWithSize: length + 1]; + + while (*string_ != '\0') { + size_t characterLen; + + if (swap) + characterLen = of_string_unicode_to_utf8( + of_bswap32(*string_), buffer); + else + characterLen = of_string_unicode_to_utf8( + *string_, buffer); + + switch (characterLen) { + case 1: + string[i++] = buffer[0]; + break; + case 2: + length++; + string = [self resizeMemory: string + toSize: length + 1]; + + memcpy(string + i, buffer, 2); + i += 2; + + break; + case 3: + length += 2; + string = [self resizeMemory: string + toSize: length + 1]; + + memcpy(string + i, buffer, 3); + i += 3; + + break; + case 4: + length += 3; + string = [self resizeMemory: string + toSize: length + 1]; + + memcpy(string + i, buffer, 4); + i += 4; + + break; + default: + @throw [OFInvalidEncodingException + newWithClass: isa]; + } + + string_++; + } + + string[i] = '\0'; + } @catch (id e) { + [self release]; + @throw e; + } return self; } - initWithFormat: (OFString*)format, ... Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -35,11 +35,15 @@ static OFString *module = @"OFString"; static OFString* whitespace[] = { @" \r \t\n\t \tasd \t \t\t\r\n", @" \t\t \t\t \t \t" }; -static of_unichar_t ucstr[] = { 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0 }; +static of_unichar_t ucstr[] = { 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0x1F03A, 0 }; +static of_unichar_t sucstr[] = { + 0xFFFE0000, 0x66000000, 0xF6000000, 0xF6000000, 0x62000000, 0xE4000000, + 0x72000000, 0x3AF00100, 0 +}; @interface EntityHandler: OFObject @end @implementation EntityHandler @@ -126,13 +130,19 @@ TEST(@"-[lowercaseString]", R([s[0] upper]) && [[s[0] lowercaseString] isEqual: @"3𝄞1€sät"]) TEST(@"+[stringWithCString:length:]", (s[0] = [OFMutableString stringWithCString: "\xEF\xBB\xBF" "foobar" - length: 6]) && + length: 6]) && [s[0] isEqual: @"foo"]) + TEST(@"+[stringWithUnicodeString:]", + (s[1] = [OFString stringWithUnicodeString: ucstr]) && + [s[1] isEqual: @"fööbär🀺"] && + (s[1] = [OFString stringWithUnicodeString: sucstr]) && + [s[1] isEqual: @"fööbär🀺"]) + TEST(@"+[stringWithContentsOfFile:encoding]", (s[1] = [OFString stringWithContentsOfFile: @"testfile.txt" encoding: OF_STRING_ENCODING_ISO_8859_1]) && [s[1] isEqual: @"testäöü"]) @@ -357,12 +367,12 @@ OFOutOfRangeException, [@"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF" @"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF" hexadecimalValue]) - TEST(@"-[unicodeString]", (ua = [@"fööbär" unicodeString]) && - !memcmp(ua, ucstr, 7 * sizeof(of_unichar_t)) && R(free(ua))) + TEST(@"-[unicodeString]", (ua = [@"fööbär🀺" unicodeString]) && + !memcmp(ua, ucstr, 8 * sizeof(of_unichar_t)) && R(free(ua))) TEST(@"-[MD5Hash]", [[@"asdfoobar" MD5Hash] isEqual: @"184dce2ec49b5422c7cfd8728864db4c"]) TEST(@"-[SHA1Hash]", [[@"asdfoobar" SHA1Hash]