Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -1009,15 +1009,11 @@ memcpy(cString + j, buffer, len); j += len; break; default: - if (lossy) - cString[j++] = '?'; - else - @throw [OFInvalidEncodingException - exception]; + @throw [OFInvalidEncodingException exception]; break; } } Index: src/iso_8859_15.m ================================================================== --- src/iso_8859_15.m +++ src/iso_8859_15.m @@ -44,20 +44,19 @@ size_t i; for (i = 0; i < length; i++) { of_unichar_t c = input[i]; - if OF_UNLIKELY (c == 0xA4 || c == 0xA6 || c == 0xA8 || - c == 0xB4 || c == 0xB8 || c == 0xBC || c == 0xBD || - c == 0xBE || c > 0xFFFF) { - if (lossy) - output[i] = '?'; - else - return false; - } - if OF_UNLIKELY (c > 0xFF) { + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + switch ((of_char16_t)c) { case 0x20AC: output[i] = 0xA4; break; case 0x160: @@ -87,11 +86,30 @@ else return false; break; } - } else - output[i] = (uint8_t)c; + } else { + switch (c) { + case 0xA4: + case 0xA6: + case 0xA8: + case 0xB4: + case 0xB8: + case 0xBC: + case 0xBD: + case 0xBE: + if (lossy) + output[i] = '?'; + else + return false; + + break; + default: + output[i] = (uint8_t)c; + break; + } + } } return true; } Index: src/windows_1252.m ================================================================== --- src/windows_1252.m +++ src/windows_1252.m @@ -44,18 +44,19 @@ size_t i; for (i = 0; i < length; i++) { of_unichar_t c = input[i]; - if OF_UNLIKELY ((c >= 0x80 && c <= 0x9F) || c > 0xFFFF) { - if (lossy) - output[i] = '?'; - else - return false; - } - if OF_UNLIKELY (c > 0xFF) { + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + switch ((of_char16_t)c) { case 0x20AC: output[i] = 0x80; break; case 0x201A: @@ -142,11 +143,18 @@ else return false; break; } - } else - output[i] = (uint8_t)c; + } else { + if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) { + if (lossy) + output[i] = '?'; + else + return false; + } else + output[i] = (uint8_t)c; + } } return true; } Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -19,10 +19,11 @@ #include #include #include #import "OFString.h" +#import "OFMutableString_UTF8.h" #import "OFArray.h" #import "OFURL.h" #import "OFAutoreleasePool.h" #import "OFInvalidArgumentException.h" @@ -196,81 +197,81 @@ EXPECT_EXCEPTION(@"Detection of invalid UTF-8 encoding #2", OFInvalidEncodingException, [OFString stringWithUTF8String: "\xF0\x80\x80\xC0"]) TEST(@"-[reverse] on UTF-8 strings", - (s[0] = [OFMutableString stringWithUTF8String: "äöü€𝄞"]) && + (s[0] = [OFMutableString_UTF8 stringWithUTF8String: "äöü€𝄞"]) && R([s[0] reverse]) && [s[0] isEqual: @"𝄞€üöä"]) - TEST(@"Conversion of ISO 8859-1 to UTF-8", + TEST(@"Conversion of ISO 8859-1 to Unicode", [[OFString stringWithCString: "\xE4\xF6\xFC" encoding: OF_STRING_ENCODING_ISO_8859_1] isEqual: @"äöü"]) - TEST(@"Conversion of ISO 8859-15 to UTF-8", + TEST(@"Conversion of ISO 8859-15 to Unicode", [[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE" encoding: OF_STRING_ENCODING_ISO_8859_15] isEqual: @"€ŠšŽžŒœŸ"]) - TEST(@"Conversion of Windows 1252 to UTF-8", + TEST(@"Conversion of Windows 1252 to Unicode", [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88" "\x89\x8A\x8B\x8C\x8E\x91\x92\x93" "\x94\x95\x96\x97\x98\x99\x9A\x9B" "\x9C\x9E\x9F" encoding: OF_STRING_ENCODING_WINDOWS_1252] isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"]) - TEST(@"Conversion of Codepage 437 to UTF-8", + TEST(@"Conversion of Codepage 437 to Unicode", [[OFString stringWithCString: "\xB0\xB1\xB2\xDB" encoding: OF_STRING_ENCODING_CODEPAGE_437] isEqual: @"░▒▓█"]) - TEST(@"Conversion of UTF-8 to ASCII #1", + TEST(@"Conversion of Unicode to ASCII #1", !strcmp([@"This is a test" cStringWithEncoding: OF_STRING_ENCODING_ASCII], "This is a test")) - EXPECT_EXCEPTION(@"Conversion of UTF-8 to ASCII #2", + EXPECT_EXCEPTION(@"Conversion of Unicode to ASCII #2", OFInvalidEncodingException, [@"This is a tést" cStringWithEncoding: OF_STRING_ENCODING_ASCII]) - TEST(@"Conversion of UTF-8 to ISO-8859-1 #1", + TEST(@"Conversion of Unicode to ISO-8859-1 #1", !strcmp([@"This is ä test" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 test")) - EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-1 #2", + EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-1 #2", OFInvalidEncodingException, [@"This is ä t€st" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1]) - TEST(@"Conversion of UTF-8 to ISO-8859-15 #1", + TEST(@"Conversion of Unicode to ISO-8859-15 #1", !strcmp([@"This is ä t€st" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st")) - EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-15 #2", + EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-15 #2", OFInvalidEncodingException, [@"This is ä t€st…" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15]) - TEST(@"Conversion of UTF-8 to Windows-1252 #1", + TEST(@"Conversion of Unicode to Windows-1252 #1", !strcmp([@"This is ä t€st…" cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85")) - EXPECT_EXCEPTION(@"Conversion of UTF-8 to Windows-1252 #2", + EXPECT_EXCEPTION(@"Conversion of Unicode to Windows-1252 #2", OFInvalidEncodingException, [@"This is ä t€st…‼" cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252]) - TEST(@"Lossy conversion of UTF-8 to ASCII", + TEST(@"Lossy conversion of Unicode to ASCII", !strcmp([@"This is a tést" lossyCStringWithEncoding: OF_STRING_ENCODING_ASCII], "This is a t?st")) - TEST(@"Lossy conversion of UTF-8 to ISO-8859-1", + TEST(@"Lossy conversion of Unicode to ISO-8859-1", !strcmp([@"This is ä t€st" lossyCStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 t?st")) - TEST(@"Lossy conversion of UTF-8 to ISO-8859-15", + TEST(@"Lossy conversion of Unicode to ISO-8859-15", !strcmp([@"This is ä t€st…" lossyCStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st?")) - TEST(@"Lossy conversion of UTF-8 to Windows-1252", + TEST(@"Lossy conversion of Unicode to Windows-1252", !strcmp([@"This is ä t€st…‼" lossyCStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85?")) TEST(@"+[stringWithFormat:]", [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123])