Index: src/Makefile ================================================================== --- src/Makefile +++ src/Makefile @@ -156,10 +156,11 @@ codepage_437.m \ codepage_850.m \ ${FOUNDATION_COMPAT_M} \ ${INSTANCE_M} \ iso_8859_15.m \ + mac_roman.m \ ${UNICODE_M} \ windows_1252.m SRCS_FILES += OFSettings_INIFile.m SRCS_SOCKETS += ${OFKERNELEVENTOBSERVER_EPOLL_M} \ ${OFKERNELEVENTOBSERVER_KQUEUE_M} \ Index: src/OFHTTPResponse.m ================================================================== --- src/OFHTTPResponse.m +++ src/OFHTTPResponse.m @@ -117,10 +117,12 @@ encoding = OF_STRING_ENCODING_ISO_8859_1; if ([contentType hasSuffix: @"charset=iso-8859-15"]) encoding = OF_STRING_ENCODING_ISO_8859_15; if ([contentType hasSuffix: @"charset=windows-1252"]) encoding = OF_STRING_ENCODING_WINDOWS_1252; + if ([contentType hasSuffix: @"charset=macintosh"]) + encoding = OF_STRING_ENCODING_MAC_ROMAN; } if (encoding == OF_STRING_ENCODING_AUTODETECT) encoding = OF_STRING_ENCODING_UTF_8; Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -64,10 +64,12 @@ OF_STRING_ENCODING_WINDOWS_1252, /*! Codepage 437 */ OF_STRING_ENCODING_CODEPAGE_437, /*! Codepage 850 */ OF_STRING_ENCODING_CODEPAGE_850, + /*! Mac OS Roman */ + OF_STRING_ENCODING_MAC_ROMAN, /*! Try to automatically detect the encoding */ OF_STRING_ENCODING_AUTODETECT = 0xFF } of_string_encoding_t; enum { Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -87,18 +87,20 @@ lossy: (bool)lossy; - (OFString*)OF_JSONRepresentationWithOptions: (int)options depth: (size_t)depth; @end -extern bool of_unicode_to_iso_8859_15(const of_unichar_t*, uint8_t*, size_t, - bool); -extern bool of_unicode_to_windows_1252(const of_unichar_t*, uint8_t*, size_t, - bool); -extern bool of_unicode_to_codepage_437(const of_unichar_t*, uint8_t*, size_t, - bool); -extern bool of_unicode_to_codepage_850(const of_unichar_t*, uint8_t*, size_t, - bool); +extern bool of_unicode_to_iso_8859_15(const of_unichar_t*, unsigned char*, + size_t, bool); +extern bool of_unicode_to_windows_1252(const of_unichar_t*, unsigned char*, + size_t, bool); +extern bool of_unicode_to_codepage_437(const of_unichar_t*, unsigned char*, + size_t, bool); +extern bool of_unicode_to_codepage_850(const of_unichar_t*, unsigned char*, + size_t, bool); +extern bool of_unicode_to_mac_roman(const of_unichar_t*, unsigned char*, + size_t, bool); /* References for static linking */ void _references_to_categories_of_OFString(void) { @@ -955,10 +957,12 @@ encoding = OF_STRING_ENCODING_ISO_8859_1; if ([contentType hasSuffix: @"charset=iso-8859-15"]) encoding = OF_STRING_ENCODING_ISO_8859_15; if ([contentType hasSuffix: @"charset=windows-1252"]) encoding = OF_STRING_ENCODING_WINDOWS_1252; + if ([contentType hasSuffix: @"charset=macintosh"]) + encoding = OF_STRING_ENCODING_MAC_ROMAN; } if (encoding == OF_STRING_ENCODING_AUTODETECT) encoding = OF_STRING_ENCODING_UTF_8; @@ -1066,11 +1070,11 @@ cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; } else - cString[i] = (char)characters[i]; + cString[i] = (unsigned char)characters[i]; } cString[i] = '\0'; return length; @@ -1084,55 +1088,66 @@ cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; } else - cString[i] = (uint8_t)characters[i]; + cString[i] = (unsigned char)characters[i]; } cString[i] = '\0'; return length; case OF_STRING_ENCODING_ISO_8859_15: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - if (!of_unicode_to_iso_8859_15(characters, (uint8_t*)cString, - length, lossy)) + if (!of_unicode_to_iso_8859_15(characters, + (unsigned char*)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; return length; case OF_STRING_ENCODING_WINDOWS_1252: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - if (!of_unicode_to_windows_1252(characters, (uint8_t*)cString, - length, lossy)) + if (!of_unicode_to_windows_1252(characters, + (unsigned char*)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; return length; case OF_STRING_ENCODING_CODEPAGE_437: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - if (!of_unicode_to_codepage_437(characters, (uint8_t*)cString, - length, lossy)) + if (!of_unicode_to_codepage_437(characters, + (unsigned char*)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; return length; case OF_STRING_ENCODING_CODEPAGE_850: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; - if (!of_unicode_to_codepage_850(characters, (uint8_t*)cString, - length, lossy)) + if (!of_unicode_to_codepage_850(characters, + (unsigned char*)cString, length, lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; + + return length; + case OF_STRING_ENCODING_MAC_ROMAN: + if (length + 1 > maxLength) + @throw [OFOutOfRangeException exception]; + + if (!of_unicode_to_mac_roman(characters, + (unsigned char*)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; return length; @@ -1192,10 +1207,11 @@ case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: + case OF_STRING_ENCODING_MAC_ROMAN: cString = [object allocMemoryWithSize: length + 1]; [self OF_getCString: cString maxLength: length + 1 encoding: encoding @@ -1257,10 +1273,11 @@ case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: + case OF_STRING_ENCODING_MAC_ROMAN: return [self length]; default: @throw [OFInvalidEncodingException exception]; } } Index: src/OFString_UTF8.m ================================================================== --- src/OFString_UTF8.m +++ src/OFString_UTF8.m @@ -40,10 +40,11 @@ extern const of_char16_t of_iso_8859_15[128]; extern const of_char16_t of_windows_1252[128]; extern const of_char16_t of_codepage_437[128]; extern const of_char16_t of_codepage_850[128]; +extern const of_char16_t of_mac_roman[128]; static inline int memcasecmp(const char *first, const char *second, size_t length) { for (size_t i = 0; i < length; i++) { @@ -296,10 +297,13 @@ case OF_STRING_ENCODING_CODEPAGE_437: table = of_codepage_437; break; case OF_STRING_ENCODING_CODEPAGE_850: table = of_codepage_850; + break; + case OF_STRING_ENCODING_MAC_ROMAN: + table = of_mac_roman; break; default: @throw [OFInvalidEncodingException exception]; } Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -473,10 +473,13 @@ _encoding = OF_STRING_ENCODING_ISO_8859_15; else if ([value isEqual: @"windows-1252"]) _encoding = OF_STRING_ENCODING_WINDOWS_1252; + else if ([value isEqual: @"macintosh"]) + _encoding = + OF_STRING_ENCODING_MAC_ROMAN; else return false; } last = i + 1; Index: src/codepage_437.m ================================================================== --- src/codepage_437.m +++ src/codepage_437.m @@ -36,11 +36,11 @@ 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0 }; bool -of_unicode_to_codepage_437(const of_unichar_t *input, uint8_t *output, +of_unicode_to_codepage_437(const of_unichar_t *input, unsigned char *output, size_t length, bool lossy) { for (size_t i = 0; i < length; i++) { of_unichar_t c = input[i]; Index: src/codepage_850.m ================================================================== --- src/codepage_850.m +++ src/codepage_850.m @@ -36,11 +36,11 @@ 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0 }; bool -of_unicode_to_codepage_850(const of_unichar_t *input, uint8_t *output, +of_unicode_to_codepage_850(const of_unichar_t *input, unsigned char *output, size_t length, bool lossy) { for (size_t i = 0; i < length; i++) { of_unichar_t c = input[i]; Index: src/iso_8859_15.m ================================================================== --- src/iso_8859_15.m +++ src/iso_8859_15.m @@ -36,11 +36,11 @@ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF }; bool -of_unicode_to_iso_8859_15(const of_unichar_t *input, uint8_t *output, +of_unicode_to_iso_8859_15(const of_unichar_t *input, unsigned char *output, size_t length, bool lossy) { for (size_t i = 0; i < length; i++) { of_unichar_t c = input[i]; ADDED src/mac_roman.m Index: src/mac_roman.m ================================================================== --- src/mac_roman.m +++ src/mac_roman.m @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#include "config.h" + +#import "OFString.h" + +const of_char16_t of_mac_roman[128] = { + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, + 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, + 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, + 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, + 0x220F, 0x03c0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, + 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, + 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, + 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 +}; + +bool +of_unicode_to_mac_roman(const of_unichar_t *input, unsigned char *output, + size_t length, bool lossy) +{ + for (size_t i = 0; i < length; i++) { + of_unichar_t c = input[i]; + + if OF_UNLIKELY (c > 0x7F) { + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + + switch ((of_char16_t)c) { + case 0xC4: + output[i] = 0x80; + break; + case 0xC5: + output[i] = 0x81; + break; + case 0xC7: + output[i] = 0x82; + break; + case 0xC9: + output[i] = 0x83; + break; + case 0xD1: + output[i] = 0x84; + break; + case 0xD6: + output[i] = 0x85; + break; + case 0xDC: + output[i] = 0x86; + break; + case 0xE1: + output[i] = 0x87; + break; + case 0xE0: + output[i] = 0x88; + break; + case 0xE2: + output[i] = 0x89; + break; + case 0xE4: + output[i] = 0x8A; + break; + case 0xE3: + output[i] = 0x8B; + break; + case 0xE5: + output[i] = 0x8C; + break; + case 0xE7: + output[i] = 0x8D; + break; + case 0xE9: + output[i] = 0x8E; + break; + case 0xE8: + output[i] = 0x8F; + break; + case 0xEA: + output[i] = 0x90; + break; + case 0xEB: + output[i] = 0x91; + break; + case 0xED: + output[i] = 0x92; + break; + case 0xEC: + output[i] = 0x93; + break; + case 0xEE: + output[i] = 0x94; + break; + case 0xEF: + output[i] = 0x95; + break; + case 0xF1: + output[i] = 0x96; + break; + case 0xF3: + output[i] = 0x97; + break; + case 0xF2: + output[i] = 0x98; + break; + case 0xF4: + output[i] = 0x99; + break; + case 0xF6: + output[i] = 0x9A; + break; + case 0xF5: + output[i] = 0x9B; + break; + case 0xFA: + output[i] = 0x9C; + break; + case 0xF9: + output[i] = 0x9D; + break; + case 0xFB: + output[i] = 0x9E; + break; + case 0xFC: + output[i] = 0x9F; + break; + case 0x2020: + output[i] = 0xA0; + break; + case 0xB0: + output[i] = 0xA1; + break; + case 0xA2: + output[i] = 0xA2; + break; + case 0xA3: + output[i] = 0xA3; + break; + case 0xA7: + output[i] = 0xA4; + break; + case 0x2022: + output[i] = 0xA5; + break; + case 0xB6: + output[i] = 0xA6; + break; + case 0xDF: + output[i] = 0xA7; + break; + case 0xAE: + output[i] = 0xA8; + break; + case 0xA9: + output[i] = 0xA9; + break; + case 0x2122: + output[i] = 0xAA; + break; + case 0xB4: + output[i] = 0xAB; + break; + case 0xA8: + output[i] = 0xAC; + break; + case 0x2260: + output[i] = 0xAD; + break; + case 0xC6: + output[i] = 0xAE; + break; + case 0xD8: + output[i] = 0xAF; + break; + case 0x221E: + output[i] = 0xB0; + break; + case 0xB1: + output[i] = 0xB1; + break; + case 0x2264: + output[i] = 0xB2; + break; + case 0x2265: + output[i] = 0xB3; + break; + case 0xA5: + output[i] = 0xB4; + break; + case 0xB5: + output[i] = 0xB5; + break; + case 0x2202: + output[i] = 0xB6; + break; + case 0x2211: + output[i] = 0xB7; + break; + case 0x220F: + output[i] = 0xB8; + break; + case 0x3C0: + output[i] = 0xB9; + break; + case 0x222B: + output[i] = 0xBA; + break; + case 0xAA: + output[i] = 0xBB; + break; + case 0xBA: + output[i] = 0xBC; + break; + case 0x3A9: + output[i] = 0xBD; + break; + case 0xE6: + output[i] = 0xBE; + break; + case 0xF8: + output[i] = 0xBF; + break; + case 0xBF: + output[i] = 0xC0; + break; + case 0xA1: + output[i] = 0xC1; + break; + case 0xAC: + output[i] = 0xC2; + break; + case 0x221A: + output[i] = 0xC3; + break; + case 0x192: + output[i] = 0xC4; + break; + case 0x2248: + output[i] = 0xC5; + break; + case 0x2206: + output[i] = 0xC6; + break; + case 0xAB: + output[i] = 0xC7; + break; + case 0xBB: + output[i] = 0xC8; + break; + case 0x2026: + output[i] = 0xC9; + break; + case 0xA0: + output[i] = 0xCA; + break; + case 0xC0: + output[i] = 0xCB; + break; + case 0xC3: + output[i] = 0xCC; + break; + case 0xD5: + output[i] = 0xCD; + break; + case 0x152: + output[i] = 0xCE; + break; + case 0x153: + output[i] = 0xCF; + break; + case 0x2013: + output[i] = 0xD0; + break; + case 0x2014: + output[i] = 0xD1; + break; + case 0x201C: + output[i] = 0xD2; + break; + case 0x201D: + output[i] = 0xD3; + break; + case 0x2018: + output[i] = 0xD4; + break; + case 0x2019: + output[i] = 0xD5; + break; + case 0xF7: + output[i] = 0xD6; + break; + case 0x25CA: + output[i] = 0xD7; + break; + case 0xFF: + output[i] = 0xD8; + break; + case 0x178: + output[i] = 0xD9; + break; + case 0x2044: + output[i] = 0xDA; + break; + case 0x20AC: + output[i] = 0xDB; + break; + case 0x2039: + output[i] = 0xDC; + break; + case 0x203A: + output[i] = 0xDD; + break; + case 0xFB01: + output[i] = 0xDE; + break; + case 0xFB02: + output[i] = 0xDF; + break; + case 0x2021: + output[i] = 0xE0; + break; + case 0xB7: + output[i] = 0xE1; + break; + case 0x201A: + output[i] = 0xE2; + break; + case 0x201E: + output[i] = 0xE3; + break; + case 0x2030: + output[i] = 0xE4; + break; + case 0xC2: + output[i] = 0xE5; + break; + case 0xCA: + output[i] = 0xE6; + break; + case 0xC1: + output[i] = 0xE7; + break; + case 0xCB: + output[i] = 0xE8; + break; + case 0xC8: + output[i] = 0xE9; + break; + case 0xCD: + output[i] = 0xEA; + break; + case 0xCE: + output[i] = 0xEB; + break; + case 0xCF: + output[i] = 0xEC; + break; + case 0xCC: + output[i] = 0xED; + break; + case 0xD3: + output[i] = 0xEE; + break; + case 0xD4: + output[i] = 0xEF; + break; + case 0xF8FF: + output[i] = 0xF0; + break; + case 0xD2: + output[i] = 0xF1; + break; + case 0xDA: + output[i] = 0xF2; + break; + case 0xDB: + output[i] = 0xF3; + break; + case 0xD9: + output[i] = 0xF4; + break; + case 0x131: + output[i] = 0xF5; + break; + case 0x2C6: + output[i] = 0xF6; + break; + case 0x2DC: + output[i] = 0xF7; + break; + case 0xAF: + output[i] = 0xF8; + break; + case 0x2D8: + output[i] = 0xF9; + break; + case 0x2D9: + output[i] = 0xFA; + break; + case 0x2DA: + output[i] = 0xFB; + break; + case 0xB8: + output[i] = 0xFC; + break; + case 0x2DD: + output[i] = 0xFD; + break; + case 0x2DB: + output[i] = 0xFE; + break; + case 0x2C7: + output[i] = 0xFF; + break; + default: + if (lossy) + output[i] = '?'; + else + return false; + + break; + } + } else + output[i] = (uint8_t)c; + } + + return true; +} Index: src/windows_1252.m ================================================================== --- src/windows_1252.m +++ src/windows_1252.m @@ -36,11 +36,11 @@ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF }; bool -of_unicode_to_windows_1252(const of_unichar_t *input, uint8_t *output, +of_unicode_to_windows_1252(const of_unichar_t *input, unsigned char *output, size_t length, bool lossy) { for (size_t i = 0; i < length; i++) { of_unichar_t c = input[i];