Index: src/OFMutableString_UTF8.m ================================================================== --- src/OFMutableString_UTF8.m +++ src/OFMutableString_UTF8.m @@ -534,21 +534,20 @@ size_t end = range.location + range.length; if (range.length > SIZE_MAX - range.location || end > s->length) @throw [OFOutOfRangeException exceptionWithClass: [self class]]; - s->hashed = NO; - s->length -= end - start; - if (s->isUTF8) { start = of_string_utf8_get_position(s->cString, start, s->cStringLength); end = of_string_utf8_get_position(s->cString, end, s->cStringLength); } memmove(s->cString + start, s->cString + end, s->cStringLength - end); + s->hashed = NO; + s->length -= range.length; s->cStringLength -= end - start; s->cString[s->cStringLength] = 0; @try { s->cString = [self resizeMemory: s->cString @@ -566,11 +565,11 @@ size_t newCStringLength, newLength; if (range.length > SIZE_MAX - range.location || end > s->length) @throw [OFOutOfRangeException exceptionWithClass: [self class]]; - newLength = s->length - (end - start) + [replacement length]; + newLength = s->length - range.length + [replacement length]; if (s->isUTF8) { start = of_string_utf8_get_position(s->cString, start, s->cStringLength); end = of_string_utf8_get_position(s->cString, end, Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -54,24 +54,10 @@ #ifdef OF_HAVE_BLOCKS typedef void (^of_string_line_enumeration_block_t)(OFString *line, BOOL *stop); #endif -#ifdef __cplusplus -extern "C" { -#endif -extern int of_string_utf8_check(const char*, size_t, size_t*); -extern size_t of_string_utf8_encode(of_unichar_t, char*); -extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); -extern size_t of_string_utf8_get_index(const char*, size_t); -extern size_t of_string_utf8_get_position(const char*, size_t, size_t); -extern size_t of_unicode_string_length(const of_unichar_t*); -extern size_t of_utf16_string_length(const uint16_t*); -#ifdef __cplusplus -} -#endif - @class OFArray; @class OFURL; /*! * @brief A class for handling strings. @@ -964,5 +950,16 @@ #ifndef NSINTEGER_DEFINED /* Required for string boxing literals to work */ @compatibility_alias NSString OFString; #endif + +#ifdef __cplusplus +extern "C" { +#endif +extern size_t of_string_utf8_encode(of_unichar_t, char*); +extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); +extern size_t of_string_utf16_length(const uint16_t*); +extern size_t of_string_unicode_length(const of_unichar_t*); +#ifdef __cplusplus +} +#endif Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -17,10 +17,12 @@ #include "config.h" #include #include #include + +#include #include #import "OFString.h" #import "OFString_UTF8.h" @@ -54,10 +56,115 @@ * However, the MinGW version __strtod seems to be ok. */ #ifdef _WIN32 # define strtod __strtod #endif + +/* References for static linking */ +void _references_to_categories_of_OFString(void) +{ + _OFString_Hashing_reference = 1; + _OFString_JSONValue_reference = 1; + _OFString_Serialization_reference = 1; + _OFString_URLEncoding_reference = 1; + _OFString_XMLEscaping_reference = 1; + _OFString_XMLUnescaping_reference = 1; +} + +void _reference_to_OFConstantString(void) +{ + [OFConstantString class]; +} + +size_t +of_string_utf8_encode(of_unichar_t character, char *buffer) +{ + size_t i = 0; + + if (character < 0x80) { + buffer[i] = character; + return 1; + } else if (character < 0x800) { + buffer[i++] = 0xC0 | (character >> 6); + buffer[i] = 0x80 | (character & 0x3F); + return 2; + } else if (character < 0x10000) { + buffer[i++] = 0xE0 | (character >> 12); + buffer[i++] = 0x80 | (character >> 6 & 0x3F); + buffer[i] = 0x80 | (character & 0x3F); + return 3; + } else if (character < 0x110000) { + buffer[i++] = 0xF0 | (character >> 18); + buffer[i++] = 0x80 | (character >> 12 & 0x3F); + buffer[i++] = 0x80 | (character >> 6 & 0x3F); + buffer[i] = 0x80 | (character & 0x3F); + return 4; + } + + return 0; +} + +size_t +of_string_utf8_decode(const char *buffer_, size_t length, of_unichar_t *ret) +{ + const uint8_t *buffer = (const uint8_t*)buffer_; + + if (!(*buffer & 0x80)) { + *ret = buffer[0]; + return 1; + } + + if ((*buffer & 0xE0) == 0xC0) { + if OF_UNLIKELY (length < 2) + return 0; + + *ret = ((buffer[0] & 0x1F) << 6) | (buffer[1] & 0x3F); + return 2; + } + + if ((*buffer & 0xF0) == 0xE0) { + if OF_UNLIKELY (length < 3) + return 0; + + *ret = ((buffer[0] & 0x0F) << 12) | ((buffer[1] & 0x3F) << 6) | + (buffer[2] & 0x3F); + return 3; + } + + if ((*buffer & 0xF8) == 0xF0) { + if OF_UNLIKELY (length < 4) + return 0; + + *ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) | + ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F); + return 4; + } + + return 0; +} + +size_t +of_string_unicode_length(const of_unichar_t *string) +{ + const of_unichar_t *string_ = string; + + while (*string_ != 0) + string_++; + + return (size_t)(string_ - string); +} + +size_t +of_string_utf16_length(const uint16_t *string) +{ + const uint16_t *string_ = string; + + while (*string_ != 0) + string_++; + + return (size_t)(string_ - string); +} static OFString* standardize_path(OFArray *components, OFString *currentDirectory, OFString *parentDirectory, OFString *joinString) { @@ -101,49 +208,10 @@ objc_autoreleasePoolPop(pool); return [ret autorelease]; } - -/* References for static linking */ -void _references_to_categories_of_OFString(void) -{ - _OFString_Hashing_reference = 1; - _OFString_JSONValue_reference = 1; - _OFString_Serialization_reference = 1; - _OFString_URLEncoding_reference = 1; - _OFString_XMLEscaping_reference = 1; - _OFString_XMLUnescaping_reference = 1; -} - -void _reference_to_OFConstantString(void) -{ - [OFConstantString class]; -} - -size_t -of_unicode_string_length(const of_unichar_t *string) -{ - const of_unichar_t *string_ = string; - - while (*string_ != 0) - string_++; - - return (size_t)(string_ - string); -} - -size_t -of_utf16_string_length(const uint16_t *string) -{ - const uint16_t *string_ = string; - - while (*string_ != 0) - string_++; - - return (size_t)(string_ - string); -} - static struct { Class isa; } placeholder; @interface OFString_placeholder: OFString @@ -626,19 +694,19 @@ - initWithUnicodeString: (const of_unichar_t*)string { return [self initWithUnicodeString: string byteOrder: OF_BYTE_ORDER_NATIVE - length: of_unicode_string_length(string)]; + length: of_string_unicode_length(string)]; } - initWithUnicodeString: (const of_unichar_t*)string byteOrder: (of_byte_order_t)byteOrder { return [self initWithUnicodeString: string byteOrder: byteOrder - length: of_unicode_string_length(string)]; + length: of_string_unicode_length(string)]; } - initWithUnicodeString: (const of_unichar_t*)string length: (size_t)length { @@ -662,19 +730,19 @@ - initWithUTF16String: (const uint16_t*)string { return [self initWithUTF16String: string byteOrder: OF_BYTE_ORDER_BIG_ENDIAN - length: of_utf16_string_length(string)]; + length: of_string_utf16_length(string)]; } - initWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder { return [self initWithUTF16String: string byteOrder: byteOrder - length: of_utf16_string_length(string)]; + length: of_string_utf16_length(string)]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length { @@ -944,10 +1012,12 @@ exceptionWithClass: [self class]]; } } cString[j] = '\0'; + + assert(j == cStringLength); @try { cString = [object resizeMemory: cString size: cStringLength + 1]; } @catch (OFOutOfMemoryException *e) { Index: src/OFString_UTF8.h ================================================================== --- src/OFString_UTF8.h +++ src/OFString_UTF8.h @@ -40,5 +40,15 @@ - OF_initWithUTF8String: (const char*)UTF8String length: (size_t)UTF8StringLength storage: (char*)storage; @end + +#ifdef __cplusplus +extern "C" { +#endif +extern int of_string_utf8_check(const char*, size_t, size_t*); +extern size_t of_string_utf8_get_index(const char*, size_t); +extern size_t of_string_utf8_get_position(const char*, size_t, size_t); +#ifdef __cplusplus +} +#endif Index: src/OFString_UTF8.m ================================================================== --- src/OFString_UTF8.m +++ src/OFString_UTF8.m @@ -122,77 +122,10 @@ *length = tmpLength; return isUTF8; } -size_t -of_string_utf8_encode(of_unichar_t character, char *buffer) -{ - size_t i = 0; - - if (character < 0x80) { - buffer[i] = character; - return 1; - } else if (character < 0x800) { - buffer[i++] = 0xC0 | (character >> 6); - buffer[i] = 0x80 | (character & 0x3F); - return 2; - } else if (character < 0x10000) { - buffer[i++] = 0xE0 | (character >> 12); - buffer[i++] = 0x80 | (character >> 6 & 0x3F); - buffer[i] = 0x80 | (character & 0x3F); - return 3; - } else if (character < 0x110000) { - buffer[i++] = 0xF0 | (character >> 18); - buffer[i++] = 0x80 | (character >> 12 & 0x3F); - buffer[i++] = 0x80 | (character >> 6 & 0x3F); - buffer[i] = 0x80 | (character & 0x3F); - return 4; - } - - return 0; -} - -size_t -of_string_utf8_decode(const char *buffer_, size_t length, of_unichar_t *ret) -{ - const uint8_t *buffer = (const uint8_t*)buffer_; - - if (!(*buffer & 0x80)) { - *ret = buffer[0]; - return 1; - } - - if ((*buffer & 0xE0) == 0xC0) { - if OF_UNLIKELY (length < 2) - return 0; - - *ret = ((buffer[0] & 0x1F) << 6) | (buffer[1] & 0x3F); - return 2; - } - - if ((*buffer & 0xF0) == 0xE0) { - if OF_UNLIKELY (length < 3) - return 0; - - *ret = ((buffer[0] & 0x0F) << 12) | ((buffer[1] & 0x3F) << 6) | - (buffer[2] & 0x3F); - return 3; - } - - if ((*buffer & 0xF8) == 0xF0) { - if OF_UNLIKELY (length < 4) - return 0; - - *ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) | - ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F); - return 4; - } - - return 0; -} - size_t of_string_utf8_get_index(const char *string, size_t position) { size_t i, index = position; @@ -1322,12 +1255,11 @@ size_t i, j; ret = [object allocMemoryWithSize: sizeof(of_unichar_t) count: s->length + 1]; - i = 0; - j = 0; + i = j = 0; while (i < s->cStringLength) { of_unichar_t c; size_t cLen;