Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -160,20 +160,39 @@ + (instancetype)stringWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; /*! - * @brief Creates a new OFString from a big endian UTF-16 encoded string with - * the specified length. + * @brief Creates a new OFString from a UTF-16 encoded string. + * + * @param string The UTF-16 string + * @return A new autoreleased OFString + */ ++ (instancetype)stringWithUTF16String: (const uint16_t*)string; + +/*! + * @brief Creates a new OFString from a UTF-16 encoded string with the specified + * length. * * @param string The UTF-16 string * @param length The length of the unicode string * @return A new autoreleased OFString */ + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length; +/*! + * @brief Creates a new OFString from a UTF-16 encoded string, assuming the + * specified byte order if no BOM is found. + * + * @param string The UTF-16 string + * @param byteOrder The byte order to assume if there is no BOM + * @return A new autoreleased OFString + */ ++ (instancetype)stringWithUTF16String: (const uint16_t*)string + byteOrder: (of_byte_order_t)byteOrder; + /*! * @brief Creates a new OFString from a UTF-16 encoded string with the * specified length, assuming the specified byte order if no BOM is * found. * @@ -339,10 +358,18 @@ */ - initWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; +/*! + * @brief Initializes an already allocated OFString with a UTF-16 string. + * + * @param string The UTF-16 string + * @return An initialized OFString + */ +- initWithUTF16String: (const uint16_t*)string; + /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length. * * @param string The UTF-16 string @@ -350,10 +377,21 @@ * @return An initialized OFString */ - initWithUTF16String: (const uint16_t*)string length: (size_t)length; +/*! + * @brief Initializes an already allocated OFString with a UTF-16 string, + * assuming the specified byte order if no BOM is found. + * + * @param string The UTF-16 string + * @param byteOrder The byte order to assume if there is no BOM + * @return An initialized OFString + */ +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_byte_order_t)byteOrder; + /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length, assuming the specified byte order if no BOM is * found. * @@ -900,8 +938,9 @@ #ifdef __cplusplus extern "C" { #endif extern size_t of_string_utf8_encode(of_unichar_t, char*); extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); +extern size_t of_string_utf16_length(const uint16_t*); #ifdef __cplusplus } #endif Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -139,10 +139,21 @@ return 4; } return 0; } + +size_t +of_string_utf16_length(const uint16_t *string) +{ + size_t length = 0; + + while (*string++ != 0) + length++; + + return length; +} static OFString* standardize_path(OFArray *components, OFString *currentDirectory, OFString *parentDirectory, OFString *joinString) { @@ -298,17 +309,29 @@ { return (id)[[OFString_UTF8 alloc] initWithCharacters: string length: length byteOrder: byteOrder]; } + +- initWithUTF16String: (const uint16_t*)string +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string]; +} - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string length: length]; } + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_byte_order_t)byteOrder +{ + return (id)[[OFString_UTF8 alloc] initWithUTF16String: string + byteOrder: byteOrder]; +} - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { @@ -485,17 +508,29 @@ { return [[[self alloc] initWithCharacters: string length: length byteOrder: byteOrder] autorelease]; } + ++ (instancetype)stringWithUTF16String: (const uint16_t*)string +{ + return [[[self alloc] initWithUTF16String: string] autorelease]; +} + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length { return [[[self alloc] initWithUTF16String: string length: length] autorelease]; } + ++ (instancetype)stringWithUTF16String: (const uint16_t*)string + byteOrder: (of_byte_order_t)byteOrder +{ + return [[[self alloc] initWithUTF16String: string + byteOrder: byteOrder] autorelease]; +} + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { @@ -640,18 +675,33 @@ } @catch (id e) { [self release]; @throw e; } } + +- initWithUTF16String: (const uint16_t*)string +{ + return [self initWithUTF16String: string + length: of_string_utf16_length(string) + byteOrder: OF_BYTE_ORDER_NATIVE]; +} - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return [self initWithUTF16String: string length: length byteOrder: OF_BYTE_ORDER_NATIVE]; } + +- initWithUTF16String: (const uint16_t*)string + byteOrder: (of_byte_order_t)byteOrder +{ + return [self initWithUTF16String: string + length: of_string_utf16_length(string) + byteOrder: byteOrder]; +} - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { @@ -2011,11 +2061,11 @@ size_t i, j; BOOL swap = (byteOrder != OF_BYTE_ORDER_NATIVE); /* Allocate memory for the worst case */ ret = [object allocMemoryWithSize: sizeof(uint16_t) - count: length * 2]; + count: (length + 1) * 2]; j = 0; for (i = 0; i < length; i++) { of_unichar_t c = characters[i]; @@ -2038,15 +2088,16 @@ ret[j++] = 0xDC00 | (c & 0x3FF); } else ret[j++] = c; } } + ret[j] = 0; @try { ret = [object resizeMemory: ret size: sizeof(uint16_t) - count: j]; + count: j + 1]; } @catch (OFOutOfMemoryException *e) { /* We don't care, as we only tried to make it smaller */ } objc_autoreleasePoolPop(pool); Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -45,14 +45,15 @@ static of_unichar_t sucstr[] = { 0xFFFE0000, 0x66000000, 0xF6000000, 0xF6000000, 0x62000000, 0xE4000000, 0x72000000, 0x3AF00100 }; static uint16_t utf16str[] = { - 0xFEFF, 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0xD83C, 0xDC3A + 0xFEFF, 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0xD83C, 0xDC3A, 0 }; static uint16_t sutf16str[] = { - 0xFFFE, 0x6600, 0xF600, 0xF600, 0x6200, 0xE400, 0x7200, 0x3CD8, 0x3ADC + 0xFFFE, 0x6600, 0xF600, 0xF600, 0x6200, 0xE400, 0x7200, 0x3CD8, 0x3ADC, + 0 }; @interface EntityHandler: OFObject @end @@ -164,18 +165,14 @@ (is = [OFString stringWithCharacters: sucstr length: sizeof(sucstr) / sizeof(*sucstr)]) && [is isEqual: @"fööbär🀺"]) - TEST(@"+[stringWithUTF16String:length:]", - (is = [OFString stringWithUTF16String: utf16str - length: sizeof(utf16str) / - sizeof(*utf16str)]) && + TEST(@"+[stringWithUTF16String:]", + (is = [OFString stringWithUTF16String: utf16str]) && [is isEqual: @"fööbär🀺"] && - (is = [OFString stringWithUTF16String: sutf16str - length: sizeof(sutf16str) / - sizeof(*sutf16str)]) && + (is = [OFString stringWithUTF16String: sutf16str]) && [is isEqual: @"fööbär🀺"]) TEST(@"+[stringWithContentsOfFile:encoding]", (is = [OFString stringWithContentsOfFile: @"testfile.txt" encoding: OF_STRING_ENCODING_ISO_8859_1]) &&