Index: ObjFW.xcodeproj/project.pbxproj ================================================================== --- ObjFW.xcodeproj/project.pbxproj +++ ObjFW.xcodeproj/project.pbxproj @@ -966,10 +966,12 @@ 4BF33B4813380D2D0059CEF7 /* testfile.bin in CopyFiles */ = {isa = PBXBuildFile; fileRef = 4BF33B4213380CD40059CEF7 /* testfile.bin */; }; 4BF3A2291E25EA48002EA46F /* windows_1251.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF3A2281E25EA48002EA46F /* windows_1251.m */; }; 4BF3A22A1E25EA48002EA46F /* windows_1251.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF3A2281E25EA48002EA46F /* windows_1251.m */; }; 4BF48CE118A95F83000E8D04 /* OFBigDataArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BF48CDF18A95F83000E8D04 /* OFBigDataArray.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BF48CE218A95F83000E8D04 /* OFBigDataArray.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF48CE018A95F83000E8D04 /* OFBigDataArray.m */; }; + 4BF5CB901E2DC1D800CF7584 /* iso_8859_2.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF5CB8F1E2DC1D800CF7584 /* iso_8859_2.m */; }; + 4BF5CB911E2DC1D800CF7584 /* iso_8859_2.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF5CB8F1E2DC1D800CF7584 /* iso_8859_2.m */; }; 4BF69CE61BD44F8B00DFFC1B /* platform.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BF69CE51BD44F8B00DFFC1B /* platform.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BFF3714177E17C100192782 /* OFRemoveItemFailedException.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BFF3710177E17C100192782 /* OFRemoveItemFailedException.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BFF3715177E17C100192782 /* OFRemoveItemFailedException.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BFF3711177E17C100192782 /* OFRemoveItemFailedException.m */; }; /* End PBXBuildFile section */ @@ -1591,10 +1593,11 @@ 4BF33B4213380CD40059CEF7 /* testfile.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = testfile.bin; path = tests/testfile.bin; sourceTree = ""; }; 4BF33B4313380CD40059CEF7 /* testfile.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = testfile.txt; path = tests/testfile.txt; sourceTree = ""; }; 4BF3A2281E25EA48002EA46F /* windows_1251.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = windows_1251.m; path = src/windows_1251.m; sourceTree = ""; }; 4BF48CDF18A95F83000E8D04 /* OFBigDataArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OFBigDataArray.h; path = src/OFBigDataArray.h; sourceTree = ""; }; 4BF48CE018A95F83000E8D04 /* OFBigDataArray.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = OFBigDataArray.m; path = src/OFBigDataArray.m; sourceTree = ""; }; + 4BF5CB8F1E2DC1D800CF7584 /* iso_8859_2.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = iso_8859_2.m; path = src/iso_8859_2.m; sourceTree = ""; }; 4BF69CE51BD44F8B00DFFC1B /* platform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = platform.h; path = src/platform.h; sourceTree = ""; }; 4BFBDD1610A0724800051AFB /* unicode.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = unicode.m; path = src/unicode.m; sourceTree = ""; }; 4BFF3710177E17C100192782 /* OFRemoveItemFailedException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OFRemoveItemFailedException.h; path = src/exceptions/OFRemoveItemFailedException.h; sourceTree = ""; }; 4BFF3711177E17C100192782 /* OFRemoveItemFailedException.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = OFRemoveItemFailedException.m; path = src/exceptions/OFRemoveItemFailedException.m; sourceTree = ""; }; /* End PBXFileReference section */ @@ -2161,10 +2164,11 @@ 4B6736111E2B2F6F00681F2C /* codepage_858.m */, 4B881D391CCBFE0700E2F7D8 /* crc32.h */, 4B881D3A1CCBFE0700E2F7D8 /* crc32.m */, 4B837D7816829C5F007A3E83 /* instance.h */, 4B6AF96F10A8D40E0003FB0A /* iso_8859_15.m */, + 4BF5CB8F1E2DC1D800CF7584 /* iso_8859_2.m */, 4BE17AD812FD744C002CEB0B /* foundation-compat.m */, 4BBA36C511406AB700CBA3AC /* macros.h */, 4B5EBFB81E25A937004FE6A2 /* mac_roman.m */, 4BF1BCBF11C9663F0025511F /* objfw-defs.h.in */, 4BB50DCF12F863C700C9393F /* of_asprintf.h */, @@ -3196,12 +3200,13 @@ 4B2C21891DA292BE00735907 /* base64.m in Sources */, 4B2C218A1DA292BE00735907 /* codepage_437.m in Sources */, 4B5EBFB71E25A8CF004FE6A2 /* codepage_850.m in Sources */, 4B6736131E2B2F6F00681F2C /* codepage_858.m in Sources */, 4B2C218B1DA292BE00735907 /* crc32.m in Sources */, - 4B2C218C1DA292BE00735907 /* iso_8859_15.m in Sources */, 4B2C218D1DA292BE00735907 /* foundation-compat.m in Sources */, + 4B2C218C1DA292BE00735907 /* iso_8859_15.m in Sources */, + 4BF5CB911E2DC1D800CF7584 /* iso_8859_2.m in Sources */, 4B5EBFBA1E25A937004FE6A2 /* mac_roman.m in Sources */, 4B2C218E1DA292BE00735907 /* of_asprintf.m in Sources */, 4B2C218F1DA292BE00735907 /* of_strptime.m in Sources */, 4B2C21901DA292BE00735907 /* pbkdf2.m in Sources */, 4B2C21911DA292BE00735907 /* resolver.m in Sources */, @@ -3389,12 +3394,13 @@ 4B3D23B31337FC0D00DD29B8 /* base64.m in Sources */, 4BB52CC717B8EA7F00B7EBF5 /* codepage_437.m in Sources */, 4B5EBFB61E25A8CF004FE6A2 /* codepage_850.m in Sources */, 4B6736121E2B2F6F00681F2C /* codepage_858.m in Sources */, 4B881D3D1CCBFE2A00E2F7D8 /* crc32.m in Sources */, - 4B3D23B41337FC0D00DD29B8 /* iso_8859_15.m in Sources */, 4B3D23B51337FC0D00DD29B8 /* foundation-compat.m in Sources */, + 4B3D23B41337FC0D00DD29B8 /* iso_8859_15.m in Sources */, + 4BF5CB901E2DC1D800CF7584 /* iso_8859_2.m in Sources */, 4B5EBFB91E25A937004FE6A2 /* mac_roman.m in Sources */, 4B3D23EE1337FFD000DD29B8 /* of_asprintf.m in Sources */, 4BA355BA14879BDD00442EF4 /* of_strptime.m in Sources */, 4BD306351D46CEE300E2F372 /* pbkdf2.m in Sources */, 4B7769EE1895C07D00D12284 /* resolver.m in Sources */, Index: src/Makefile ================================================================== --- src/Makefile +++ src/Makefile @@ -157,10 +157,11 @@ codepage_850.m \ codepage_858.m \ ${FOUNDATION_COMPAT_M} \ ${INSTANCE_M} \ iso_8859_15.m \ + iso_8859_2.m \ mac_roman.m \ ${UNICODE_M} \ windows_1251.m \ windows_1252.m SRCS_FILES += OFSettings_INIFile.m Index: src/OFHTTPResponse.m ================================================================== --- src/OFHTTPResponse.m +++ src/OFHTTPResponse.m @@ -114,10 +114,13 @@ if ([contentType hasSuffix: @"charset=utf-8"]) encoding = OF_STRING_ENCODING_UTF_8; else if ([contentType hasSuffix: @"charset=iso-8859-1"] || [contentType hasSuffix: @"charset=iso_8859-1"]) encoding = OF_STRING_ENCODING_ISO_8859_1; + else if ([contentType hasSuffix: @"charset=iso-8859-2"] || + [contentType hasSuffix: @"charset=iso_8859-2"]) + encoding = OF_STRING_ENCODING_ISO_8859_2; else if ([contentType hasSuffix: @"charset=iso-8859-15"] || [contentType hasSuffix: @"charset=iso_8859-15"]) encoding = OF_STRING_ENCODING_ISO_8859_15; else if ([contentType hasSuffix: @"charset=windows-1251"] || [contentType hasSuffix: @"charset=cp1251"] || Index: src/OFLocalization.m ================================================================== --- src/OFLocalization.m +++ src/OFLocalization.m @@ -104,10 +104,14 @@ _encoding = OF_STRING_ENCODING_ASCII; else if (strcmp(tmp, "iso8859-1") == 0 || strcmp(tmp, "iso-8859-1") == 0 || strcmp(tmp, "iso_8859-1") == 0) _encoding = OF_STRING_ENCODING_ISO_8859_1; + else if (strcmp(tmp, "iso8859-2") == 0 || + strcmp(tmp, "iso-8859-2") == 0 || + strcmp(tmp, "iso_8859-2") == 0) + _encoding = OF_STRING_ENCODING_ISO_8859_2; else if (strcmp(tmp, "iso8859-15") == 0 || strcmp(tmp, "iso-8859-15") == 0 || strcmp(tmp, "iso_8859-15") == 0) _encoding = OF_STRING_ENCODING_ISO_8859_15; /* Windows and DJGPP use a codepage */ Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -56,10 +56,12 @@ OF_STRING_ENCODING_UTF_8, /*! ASCII */ OF_STRING_ENCODING_ASCII, /*! ISO 8859-1 */ OF_STRING_ENCODING_ISO_8859_1, + /*! ISO 8859-2 */ + OF_STRING_ENCODING_ISO_8859_2, /*! ISO 8859-15 */ OF_STRING_ENCODING_ISO_8859_15, /*! Windows-1251 */ OF_STRING_ENCODING_WINDOWS_1251, /*! Windows-1252 */ Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -87,10 +87,12 @@ lossy: (bool)lossy; - (OFString*)OF_JSONRepresentationWithOptions: (int)options depth: (size_t)depth; @end +extern bool of_unicode_to_iso_8859_2(const of_unichar_t*, unsigned char*, + size_t, bool); extern bool of_unicode_to_iso_8859_15(const of_unichar_t*, unsigned char*, size_t, bool); extern bool of_unicode_to_windows_1251(const of_unichar_t*, unsigned char*, size_t, bool); extern bool of_unicode_to_windows_1252(const of_unichar_t*, unsigned char*, @@ -1062,10 +1064,21 @@ cString[i] = (unsigned char)characters[i]; } cString[i] = '\0'; + return length; + case OF_STRING_ENCODING_ISO_8859_2: + if (length + 1 > maxLength) + @throw [OFOutOfRangeException exception]; + + if (!of_unicode_to_iso_8859_2(characters, + (unsigned char*)cString, length, lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; + return length; case OF_STRING_ENCODING_ISO_8859_15: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; @@ -1194,10 +1207,11 @@ } break; case OF_STRING_ENCODING_ASCII: case OF_STRING_ENCODING_ISO_8859_1: + case OF_STRING_ENCODING_ISO_8859_2: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1251: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: @@ -1262,10 +1276,11 @@ } return UTF8StringLength; case OF_STRING_ENCODING_ASCII: case OF_STRING_ENCODING_ISO_8859_1: + case OF_STRING_ENCODING_ISO_8859_2: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1251: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: Index: src/OFString_UTF8.m ================================================================== --- src/OFString_UTF8.m +++ src/OFString_UTF8.m @@ -36,10 +36,11 @@ #import "OFOutOfRangeException.h" #import "of_asprintf.h" #import "unicode.h" +extern const of_char16_t of_iso_8859_2[128]; extern const of_char16_t of_iso_8859_15[128]; extern const of_char16_t of_windows_1251[128]; extern const of_char16_t of_windows_1252[128]; extern const of_char16_t of_codepage_437[128]; extern const of_char16_t of_codepage_850[128]; @@ -288,10 +289,13 @@ return self; } switch (encoding) { + case OF_STRING_ENCODING_ISO_8859_2: + table = of_iso_8859_2; + break; case OF_STRING_ENCODING_ISO_8859_15: table = of_iso_8859_15; break; case OF_STRING_ENCODING_WINDOWS_1251: table = of_windows_1251; Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -468,10 +468,14 @@ _encoding = OF_STRING_ENCODING_UTF_8; else if ([value isEqual: @"iso-8859-1"] || [value isEqual: @"iso_8859-1"]) _encoding = OF_STRING_ENCODING_ISO_8859_1; + else if ([value isEqual: @"iso-8859-2"] || + [value isEqual: @"iso_8859-2"]) + _encoding = + OF_STRING_ENCODING_ISO_8859_2; else if ([value isEqual: @"iso-8859-15"] || [value isEqual: @"iso_8859-15"]) _encoding = OF_STRING_ENCODING_ISO_8859_15; else if ([value isEqual: @"windows-1251"] || ADDED src/iso_8859_2.m Index: src/iso_8859_2.m ================================================================== --- src/iso_8859_2.m +++ src/iso_8859_2.m @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#include "config.h" + +#import "OFString.h" + +const of_char16_t of_iso_8859_2[128] = { + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, + 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, + 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, + 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; + +bool +of_unicode_to_iso_8859_2(const of_unichar_t *input, unsigned char *output, + size_t length, bool lossy) +{ + for (size_t i = 0; i < length; i++) { + of_unichar_t c = input[i]; + + if OF_UNLIKELY (c > 0x7F) { + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + + if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) + output[i] = c; + else { + switch ((of_char16_t)c) { + case 0xA0: + output[i] = 0xA0; + break; + case 0x104: + output[i] = 0xA1; + break; + case 0x2D8: + output[i] = 0xA2; + break; + case 0x141: + output[i] = 0xA3; + break; + case 0xA4: + output[i] = 0xA4; + break; + case 0x13D: + output[i] = 0xA5; + break; + case 0x15A: + output[i] = 0xA6; + break; + case 0xA7: + output[i] = 0xA7; + break; + case 0xA8: + output[i] = 0xA8; + break; + case 0x160: + output[i] = 0xA9; + break; + case 0x15E: + output[i] = 0xAA; + break; + case 0x164: + output[i] = 0xAB; + break; + case 0x179: + output[i] = 0xAC; + break; + case 0xAD: + output[i] = 0xAD; + break; + case 0x17D: + output[i] = 0xAE; + break; + case 0x17B: + output[i] = 0xAF; + break; + case 0xB0: + output[i] = 0xB0; + break; + case 0x105: + output[i] = 0xB1; + break; + case 0x2DB: + output[i] = 0xB2; + break; + case 0x142: + output[i] = 0xB3; + break; + case 0xB4: + output[i] = 0xB4; + break; + case 0x13E: + output[i] = 0xB5; + break; + case 0x15B: + output[i] = 0xB6; + break; + case 0x2C7: + output[i] = 0xB7; + break; + case 0xB8: + output[i] = 0xB8; + break; + case 0x161: + output[i] = 0xB9; + break; + case 0x15F: + output[i] = 0xBA; + break; + case 0x165: + output[i] = 0xBB; + break; + case 0x17A: + output[i] = 0xBC; + break; + case 0x2DD: + output[i] = 0xBD; + break; + case 0x17E: + output[i] = 0xBE; + break; + case 0x17C: + output[i] = 0xBF; + break; + case 0x154: + output[i] = 0xC0; + break; + case 0xC1: + output[i] = 0xC1; + break; + case 0xC2: + output[i] = 0xC2; + break; + case 0x102: + output[i] = 0xC3; + break; + case 0xC4: + output[i] = 0xC4; + break; + case 0x139: + output[i] = 0xC5; + break; + case 0x106: + output[i] = 0xC6; + break; + case 0xC7: + output[i] = 0xC7; + break; + case 0x10C: + output[i] = 0xC8; + break; + case 0xC9: + output[i] = 0xC9; + break; + case 0x118: + output[i] = 0xCA; + break; + case 0xCB: + output[i] = 0xCB; + break; + case 0x11A: + output[i] = 0xCC; + break; + case 0xCD: + output[i] = 0xCD; + break; + case 0xCE: + output[i] = 0xCE; + break; + case 0x10E: + output[i] = 0xCF; + break; + case 0x110: + output[i] = 0xD0; + break; + case 0x143: + output[i] = 0xD1; + break; + case 0x147: + output[i] = 0xD2; + break; + case 0xD3: + output[i] = 0xD3; + break; + case 0xD4: + output[i] = 0xD4; + break; + case 0x150: + output[i] = 0xD5; + break; + case 0xD6: + output[i] = 0xD6; + break; + case 0xD7: + output[i] = 0xD7; + break; + case 0x158: + output[i] = 0xD8; + break; + case 0x16E: + output[i] = 0xD9; + break; + case 0xDA: + output[i] = 0xDA; + break; + case 0x170: + output[i] = 0xDB; + break; + case 0xDC: + output[i] = 0xDC; + break; + case 0xDD: + output[i] = 0xDD; + break; + case 0x162: + output[i] = 0xDE; + break; + case 0xDF: + output[i] = 0xDF; + break; + case 0x155: + output[i] = 0xE0; + break; + case 0xE1: + output[i] = 0xE1; + break; + case 0xE2: + output[i] = 0xE2; + break; + case 0x103: + output[i] = 0xE3; + break; + case 0xE4: + output[i] = 0xE4; + break; + case 0x13A: + output[i] = 0xE5; + break; + case 0x107: + output[i] = 0xE6; + break; + case 0xE7: + output[i] = 0xE7; + break; + case 0x10D: + output[i] = 0xE8; + break; + case 0xE9: + output[i] = 0xE9; + break; + case 0x119: + output[i] = 0xEA; + break; + case 0xEB: + output[i] = 0xEB; + break; + case 0x11B: + output[i] = 0xEC; + break; + case 0xED: + output[i] = 0xED; + break; + case 0xEE: + output[i] = 0xEE; + break; + case 0x10F: + output[i] = 0xEF; + break; + case 0x111: + output[i] = 0xF0; + break; + case 0x144: + output[i] = 0xF1; + break; + case 0x148: + output[i] = 0xF2; + break; + case 0xF3: + output[i] = 0xF3; + break; + case 0xF4: + output[i] = 0xF4; + break; + case 0x151: + output[i] = 0xF5; + break; + case 0xF6: + output[i] = 0xF6; + break; + case 0xF7: + output[i] = 0xF7; + break; + case 0x159: + output[i] = 0xF8; + break; + case 0x16F: + output[i] = 0xF9; + break; + case 0xFA: + output[i] = 0xFA; + break; + case 0x171: + output[i] = 0xFB; + break; + case 0xFC: + output[i] = 0xFC; + break; + case 0xFD: + output[i] = 0xFD; + break; + case 0x163: + output[i] = 0xFE; + break; + case 0x2D9: + output[i] = 0xFF; + break; + default: + if (lossy) + output[i] = '?'; + else + return false; + + break; + } + } + } else + output[i] = (unsigned char)c; + } + + return true; +} Index: src/windows_1251.m ================================================================== --- src/windows_1251.m +++ src/windows_1251.m @@ -51,11 +51,11 @@ continue; } else return false; } - if (c >= 0x410 && c <= 0x44F) + if OF_LIKELY (c >= 0x410 && c <= 0x44F) output[i] = 0xC0 + (c - 0x410); else { switch ((of_char16_t)c) { case 0x402: output[i] = 0x80;