Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -72,10 +72,14 @@ encoding: (of_string_encoding_t)encoding lossy: (bool)lossy; - (const char*)OF_cStringWithEncoding: (of_string_encoding_t)encoding lossy: (bool)lossy; @end + +extern bool of_unicode_to_iso_8859_15(const of_unichar_t*, char*, size_t, bool); +extern bool of_unicode_to_windows_1252(const of_unichar_t*, char*, size_t, + bool); /* References for static linking */ void _references_to_categories_of_OFString(void) { _OFString_Hashing_reference = 1; @@ -1058,187 +1062,26 @@ return length; case OF_STRING_ENCODING_ISO_8859_15: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - for (i = 0; i < length; i++) { - of_unichar_t c = characters[i]; - - switch (c) { - case 0xA4: - case 0xA6: - case 0xA8: - case 0xB4: - case 0xB8: - case 0xBC: - case 0xBD: - case 0xBE: - if (lossy) - cString[i] = '?'; - else - @throw [OFInvalidEncodingException - exception]; - - break; - } - - if OF_UNLIKELY (c > 0xFF) { - switch (c) { - case 0x20AC: - cString[i] = 0xA4; - break; - case 0x160: - cString[i] = 0xA6; - break; - case 0x161: - cString[i] = 0xA8; - break; - case 0x17D: - cString[i] = 0xB4; - break; - case 0x17E: - cString[i] = 0xB8; - break; - case 0x152: - cString[i] = 0xBC; - break; - case 0x153: - cString[i] = 0xBD; - break; - case 0x178: - cString[i] = 0xBE; - break; - default: - if (lossy) - cString[i] = '?'; - else - @throw - [OFInvalidEncodingException - exception]; - - break; - } - } else - cString[i] = (uint8_t)c; - } - - cString[i] = '\0'; + if (!of_unicode_to_iso_8859_15(characters, cString, length, + lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; return length; case OF_STRING_ENCODING_WINDOWS_1252: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - for (i = 0; i < length; i++) { - of_unichar_t c = characters[i]; - - if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) { - if (lossy) - cString[i] = '?'; - else - @throw [OFInvalidEncodingException - exception]; - } - - if OF_UNLIKELY (c > 0xFF) { - switch (c) { - case 0x20AC: - cString[i] = 0x80; - break; - case 0x201A: - cString[i] = 0x82; - break; - case 0x192: - cString[i] = 0x83; - break; - case 0x201E: - cString[i] = 0x84; - break; - case 0x2026: - cString[i] = 0x85; - break; - case 0x2020: - cString[i] = 0x86; - break; - case 0x2021: - cString[i] = 0x87; - break; - case 0x2C6: - cString[i] = 0x88; - break; - case 0x2030: - cString[i] = 0x89; - break; - case 0x160: - cString[i] = 0x8A; - break; - case 0x2039: - cString[i] = 0x8B; - break; - case 0x152: - cString[i] = 0x8C; - break; - case 0x17D: - cString[i] = 0x8E; - break; - case 0x2018: - cString[i] = 0x91; - break; - case 0x2019: - cString[i] = 0x92; - break; - case 0x201C: - cString[i] = 0x93; - break; - case 0x201D: - cString[i] = 0x94; - break; - case 0x2022: - cString[i] = 0x95; - break; - case 0x2013: - cString[i] = 0x96; - break; - case 0x2014: - cString[i] = 0x97; - break; - case 0x2DC: - cString[i] = 0x98; - break; - case 0x2122: - cString[i] = 0x99; - break; - case 0x161: - cString[i] = 0x9A; - break; - case 0x203A: - cString[i] = 0x9B; - break; - case 0x153: - cString[i] = 0x9C; - break; - case 0x17E: - cString[i] = 0x9E; - break; - case 0x178: - cString[i] = 0x9F; - break; - default: - if (lossy) - cString[i] = '?'; - else - @throw - [OFInvalidEncodingException - exception]; - - break; - } - } else - cString[i] = (uint8_t)c; - } - - cString[i] = '\0'; + if (!of_unicode_to_windows_1252(characters, cString, length, + lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; return length; default: @throw [OFNotImplementedException exceptionWithSelector: _cmd object: self]; Index: src/iso_8859_15.m ================================================================== --- src/iso_8859_15.m +++ src/iso_8859_15.m @@ -13,10 +13,12 @@ * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #import "OFString.h" + +#import "macros.h" const of_char16_t of_iso_8859_15[128] = { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, @@ -32,5 +34,64 @@ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF }; + +bool +of_unicode_to_iso_8859_15(const of_unichar_t *input, char *output, + size_t length, bool lossy) +{ + size_t i; + + for (i = 0; i < length; i++) { + of_unichar_t c = input[i]; + + if OF_UNLIKELY (c == 0xA4 || c == 0xA6 || c == 0xA8 || + c == 0xB4 || c == 0xB8 || c == 0xBC || c == 0xBD || + c == 0xBE || c > 0xFFFF) { + if (lossy) + output[i] = '?'; + else + return false; + } + + if OF_UNLIKELY (c > 0xFF) { + switch ((of_char16_t)c) { + case 0x20AC: + output[i] = 0xA4; + break; + case 0x160: + output[i] = 0xA6; + break; + case 0x161: + output[i] = 0xA8; + break; + case 0x17D: + output[i] = 0xB4; + break; + case 0x17E: + output[i] = 0xB8; + break; + case 0x152: + output[i] = 0xBC; + break; + case 0x153: + output[i] = 0xBD; + break; + case 0x178: + output[i] = 0xBE; + break; + default: + if (lossy) + output[i] = '?'; + else + return false; + + break; + } + } else + output[i] = (uint8_t)c; + } + + return true; +} Index: src/windows_1252.m ================================================================== --- src/windows_1252.m +++ src/windows_1252.m @@ -13,10 +13,12 @@ * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #import "OFString.h" + +#import "macros.h" const of_char16_t of_windows_1252[128] = { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, @@ -32,5 +34,119 @@ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF }; + +bool +of_unicode_to_windows_1252(const of_unichar_t *input, char *output, + size_t length, bool lossy) +{ + size_t i; + + for (i = 0; i < length; i++) { + of_unichar_t c = input[i]; + + if OF_UNLIKELY ((c >= 0x80 && c <= 0x9F) || c > 0xFFFF) { + if (lossy) + output[i] = '?'; + else + return false; + } + + if OF_UNLIKELY (c > 0xFF) { + switch ((of_char16_t)c) { + case 0x20AC: + output[i] = 0x80; + break; + case 0x201A: + output[i] = 0x82; + break; + case 0x192: + output[i] = 0x83; + break; + case 0x201E: + output[i] = 0x84; + break; + case 0x2026: + output[i] = 0x85; + break; + case 0x2020: + output[i] = 0x86; + break; + case 0x2021: + output[i] = 0x87; + break; + case 0x2C6: + output[i] = 0x88; + break; + case 0x2030: + output[i] = 0x89; + break; + case 0x160: + output[i] = 0x8A; + break; + case 0x2039: + output[i] = 0x8B; + break; + case 0x152: + output[i] = 0x8C; + break; + case 0x17D: + output[i] = 0x8E; + break; + case 0x2018: + output[i] = 0x91; + break; + case 0x2019: + output[i] = 0x92; + break; + case 0x201C: + output[i] = 0x93; + break; + case 0x201D: + output[i] = 0x94; + break; + case 0x2022: + output[i] = 0x95; + break; + case 0x2013: + output[i] = 0x96; + break; + case 0x2014: + output[i] = 0x97; + break; + case 0x2DC: + output[i] = 0x98; + break; + case 0x2122: + output[i] = 0x99; + break; + case 0x161: + output[i] = 0x9A; + break; + case 0x203A: + output[i] = 0x9B; + break; + case 0x153: + output[i] = 0x9C; + break; + case 0x17E: + output[i] = 0x9E; + break; + case 0x178: + output[i] = 0x9F; + break; + default: + if (lossy) + output[i] = '?'; + else + return false; + + break; + } + } else + output[i] = (uint8_t)c; + } + + return true; +}