Index: src/OFConstString.m ================================================================== --- src/OFConstString.m +++ src/OFConstString.m @@ -38,10 +38,25 @@ @throw [OFNotImplementedException newWithClass: isa selector: _cmd]; } - initWithCString: (const char*)str +{ + @throw [OFNotImplementedException newWithClass: isa + selector: _cmd]; +} + +- initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding; +{ + @throw [OFNotImplementedException newWithClass: isa + selector: _cmd]; +} + +- initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding + length: (size_t)len { @throw [OFNotImplementedException newWithClass: isa selector: _cmd]; } Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -12,10 +12,15 @@ #include #include #import "OFObject.h" #import "OFArray.h" + +enum of_string_encoding { + OF_STRING_ENCODING_UTF_8, + OF_STRING_ENCODING_ISO_8859_1 +}; extern int of_string_check_utf8(const char*, size_t); extern size_t of_string_unicode_to_utf8(uint32_t, char*); /** @@ -46,10 +51,33 @@ * \param str A UTF-8 encoded C string to initialize the OFString with * \return A new autoreleased OFString */ + stringWithCString: (const char*)str; +/** + * Creates a new OFString from a C string with the specified encoding. + * + * \param str A C string to initialize the OFString with + * \param encoding The encoding of the C string + * \return A new autoreleased OFString + */ ++ stringWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding; + +/** + * Creates a new OFString from a C string with the specified encoding and + * length. + * + * \param str A C string to initialize the OFString with + * \param encoding The encoding of the C string + * \param len The length of the string + * \return A new autoreleased OFString + */ ++ stringWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding + length: (size_t)len; + /** * Creates a new OFString from a UTF-8 encoded C string with the specified * length. * * \param str A UTF-8 encoded C string to initialize the OFString with @@ -89,10 +117,34 @@ * \param str A UTF-8 encoded C string to initialize the OFString with * \return An initialized OFString */ - initWithCString: (const char*)str; +/** + * Initializes an already allocated OFString from a C string with the specified + * encoding. + * + * \param str A C string to initialize the OFString with + * \param encoding The encoding of the C string + * \return An initialized OFString + */ +- initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding; + +/** + * Initializes an already allocated OFString from a C string with the specified + * encoding and length. + * + * \param str A C string to initialize the OFString with + * \param encoding The encoding of the C string + * \param len The length of the string + * \return An initialized OFString + */ +- initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding + length: (size_t)len; + /** * Initializes an already allocated OFString from a UTF-8 encoded C string with * the specified length. * * \param str A UTF-8 encoded C string to initialize the OFString with Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -145,10 +145,26 @@ + stringWithCString: (const char*)str { return [[[self alloc] initWithCString: str] autorelease]; } + ++ stringWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding +{ + return [[[self alloc] initWithCString: str + encoding: encoding] autorelease]; +} + ++ stringWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding + length: (size_t)len +{ + return [[[self alloc] initWithCString: str + encoding: encoding + length: len] autorelease]; +} + stringWithCString: (const char*)str length: (size_t)len { return [[[self alloc] initWithCString: str @@ -182,48 +198,29 @@ return self; } - initWithCString: (const char*)str { - Class c; - - self = [super init]; - - if (str != NULL) { - length = strlen(str); - - switch (of_string_check_utf8(str, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - c = isa; - [super dealloc]; - @throw [OFInvalidEncodingException newWithClass: c]; - } - - @try { - string = [self allocMemoryWithSize: length + 1]; - } @catch (OFException *e) { - /* - * We can't use [super dealloc] on OS X here. - * Compiler bug? Anyway, [self dealloc] will do here as - * we don't reimplement dealloc. - */ - [self dealloc]; - @throw e; - } - memcpy(string, str, length + 1); - } - - return self; + return [self initWithCString: str + encoding: OF_STRING_ENCODING_UTF_8 + length: strlen(str)]; +} + +- initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding +{ + return [self initWithCString: str + encoding: encoding + length: strlen(str)]; } - initWithCString: (const char*)str + encoding: (enum of_string_encoding)encoding length: (size_t)len { Class c; + size_t i, j; self = [super init]; if (len > strlen(str)) { c = isa; @@ -231,36 +228,100 @@ @throw [OFOutOfRangeException newWithClass: c]; } length = len; - switch (of_string_check_utf8(str, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - c = isa; - [super dealloc]; - @throw [OFInvalidEncodingException newWithClass: c]; - } - @try { string = [self allocMemoryWithSize: length + 1]; } @catch (OFException *e) { /* * We can't use [super dealloc] on OS X here. - * Compiler bug? Anyway, [self dealloc] will do here as - * we don't reimplement dealloc. + * Compiler bug? Anyway, [self dealloc] will do here as we + * don't reimplement dealloc. */ [self dealloc]; @throw e; } - memcpy(string, str, length); - string[length] = 0; + + switch (encoding) { + case OF_STRING_ENCODING_UTF_8: + switch (of_string_check_utf8(str, length)) { + case 1: + is_utf8 = YES; + break; + case -1: + c = isa; + [super dealloc]; + @throw [OFInvalidEncodingException newWithClass: c]; + } + + memcpy(string, str, length); + string[length] = 0; + + break; + case OF_STRING_ENCODING_ISO_8859_1: + for (i = j = 0; i < length; i++) { + if ((uint8_t)str[i] < 0x80) + string[j++] = str[i]; + else { + /* + * ISO 8859-1 can only have 2 bytes when encoded + * as UTF-8, nevertheless, let's be on the safe + * side. + */ + char buf[4]; + + is_utf8 = YES; + + if (of_string_unicode_to_utf8( + (uint8_t)str[i], buf) == 0) { + c = isa; + [super dealloc]; + @throw [OFInvalidEncodingException + newWithClass: c]; + } + + length++; + @try { + string = [self resizeMemory: string + toSize: length + + 1]; + } @catch (OFException *e) { + /* + * We can't use [super dealloc] on OS X + * here. Compiler bug? Anyway, + * [self dealloc] will do here as we + * don't reimplement dealloc. + */ + [self dealloc]; + @throw e; + } + + string[j++] = buf[0]; + string[j++] = buf[1]; + } + } + + string[length] = 0; + + break; + default: + c = isa; + [super dealloc]; + @throw [OFInvalidEncodingException newWithClass: c]; + } return self; } + +- initWithCString: (const char*)str + length: (size_t)len +{ + return [self initWithCString: str + encoding: OF_STRING_ENCODING_UTF_8 + length: len]; +} - initWithFormat: (OFString*)fmt, ... { id ret; va_list args; Index: tests/OFString/OFString.m ================================================================== --- tests/OFString/OFString.m +++ tests/OFString/OFString.m @@ -22,11 +22,11 @@ #define ZD "%zd" #else #define ZD "%u" #endif -#define NUM_TESTS 68 +#define NUM_TESTS 69 #define SUCCESS \ printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m", \ (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS); \ fflush(stdout); #define FAIL \ @@ -113,10 +113,15 @@ s1 = [OFMutableString stringWithCString: "äöü€𝄞"]; CHECK([[s1 reverse] isEqual: @"𝄞€üöä"]) [s1 dealloc]; + /* ISO-8859-1 tests */ + CHECK([[OFString stringWithCString: "\xE4\xF6\xFC" + encoding: OF_STRING_ENCODING_ISO_8859_1] + isEqual: @"äöü"]) + /* Format tests */ s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123]; CHECK([s1 isEqual: @"test: 123"]) [s1 appendWithFormat: @"%02X", 15];