Index: ObjFW.xcodeproj/project.pbxproj ================================================================== --- ObjFW.xcodeproj/project.pbxproj +++ ObjFW.xcodeproj/project.pbxproj @@ -960,10 +960,12 @@ 4BF33B0E133807A20059CEF7 /* OFXMLElementBuilderTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 4B6EF67C1235358D0076B512 /* OFXMLElementBuilderTests.m */; }; 4BF33B10133807A20059CEF7 /* OFXMLParserTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 4B6EF67E1235358D0076B512 /* OFXMLParserTests.m */; }; 4BF33B12133807A20059CEF7 /* TestsAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 4B6EF6811235358D0076B512 /* TestsAppDelegate.m */; }; 4BF33B4713380CE20059CEF7 /* testfile.txt in CopyFiles */ = {isa = PBXBuildFile; fileRef = 4BF33B4313380CD40059CEF7 /* testfile.txt */; }; 4BF33B4813380D2D0059CEF7 /* testfile.bin in CopyFiles */ = {isa = PBXBuildFile; fileRef = 4BF33B4213380CD40059CEF7 /* testfile.bin */; }; + 4BF3A2291E25EA48002EA46F /* windows_1251.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF3A2281E25EA48002EA46F /* windows_1251.m */; }; + 4BF3A22A1E25EA48002EA46F /* windows_1251.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF3A2281E25EA48002EA46F /* windows_1251.m */; }; 4BF48CE118A95F83000E8D04 /* OFBigDataArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BF48CDF18A95F83000E8D04 /* OFBigDataArray.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BF48CE218A95F83000E8D04 /* OFBigDataArray.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BF48CE018A95F83000E8D04 /* OFBigDataArray.m */; }; 4BF69CE61BD44F8B00DFFC1B /* platform.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BF69CE51BD44F8B00DFFC1B /* platform.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BFF3714177E17C100192782 /* OFRemoveItemFailedException.h in Headers */ = {isa = PBXBuildFile; fileRef = 4BFF3710177E17C100192782 /* OFRemoveItemFailedException.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4BFF3715177E17C100192782 /* OFRemoveItemFailedException.m in Sources */ = {isa = PBXBuildFile; fileRef = 4BFF3711177E17C100192782 /* OFRemoveItemFailedException.m */; }; @@ -1583,10 +1585,11 @@ 4BF1BCCE11C9663F0025511F /* OFXMLAttribute.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OFXMLAttribute.h; path = src/OFXMLAttribute.h; sourceTree = ""; }; 4BF1BCCF11C9663F0025511F /* OFXMLAttribute.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = OFXMLAttribute.m; path = src/OFXMLAttribute.m; sourceTree = ""; }; 4BF33AF0133807310059CEF7 /* Tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Tests; sourceTree = BUILT_PRODUCTS_DIR; }; 4BF33B4213380CD40059CEF7 /* testfile.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = testfile.bin; path = tests/testfile.bin; sourceTree = ""; }; 4BF33B4313380CD40059CEF7 /* testfile.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = testfile.txt; path = tests/testfile.txt; sourceTree = ""; }; + 4BF3A2281E25EA48002EA46F /* windows_1251.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = windows_1251.m; path = src/windows_1251.m; sourceTree = ""; }; 4BF48CDF18A95F83000E8D04 /* OFBigDataArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OFBigDataArray.h; path = src/OFBigDataArray.h; sourceTree = ""; }; 4BF48CE018A95F83000E8D04 /* OFBigDataArray.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = OFBigDataArray.m; path = src/OFBigDataArray.m; sourceTree = ""; }; 4BF69CE51BD44F8B00DFFC1B /* platform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = platform.h; path = src/platform.h; sourceTree = ""; }; 4BFBDD1610A0724800051AFB /* unicode.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = unicode.m; path = src/unicode.m; sourceTree = ""; }; 4BFF3710177E17C100192782 /* OFRemoveItemFailedException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OFRemoveItemFailedException.h; path = src/exceptions/OFRemoveItemFailedException.h; sourceTree = ""; }; @@ -2177,10 +2180,11 @@ 4B67998B1099E7C50041064A /* threading.h */, 4B3379CE1979326A0088E97E /* threading.m */, 4BABC29A197A8212006A93BD /* threading_pthread.m */, 4B67998C1099E7C50041064A /* unicode.h */, 4BFBDD1610A0724800051AFB /* unicode.m */, + 4BF3A2281E25EA48002EA46F /* windows_1251.m */, 4B6AF97210A8D42E0003FB0A /* windows_1252.m */, ); name = ObjFW; sourceTree = ""; }; @@ -3198,10 +3202,11 @@ 4B2C21911DA292BE00735907 /* resolver.m in Sources */, 4B2C21921DA292BE00735907 /* scrypt.m in Sources */, 4B2C21931DA292BE00735907 /* socket.m in Sources */, 4B2C21941DA292BE00735907 /* threading.m in Sources */, 4B2C21961DA292BE00735907 /* unicode.m in Sources */, + 4BF3A22A1E25EA48002EA46F /* windows_1251.m in Sources */, 4B2C21971DA292BE00735907 /* windows_1252.m in Sources */, 4B2C21981DA292BE00735907 /* OFAcceptFailedException.m in Sources */, 4B2C21991DA292BE00735907 /* OFObject+KeyValueCoding.m in Sources */, 4B2C219A1DA292BE00735907 /* OFAddressTranslationFailedException.m in Sources */, 4B2C219B1DA292BE00735907 /* OFAllocFailedException.m in Sources */, @@ -3389,10 +3394,11 @@ 4B7769EE1895C07D00D12284 /* resolver.m in Sources */, 4B6994481D47FB1A007F34DF /* scrypt.m in Sources */, 4B40EC1B189FE2650031E19E /* socket.m in Sources */, 4B3379CF1979326A0088E97E /* threading.m in Sources */, 4B3D23B91337FC0D00DD29B8 /* unicode.m in Sources */, + 4BF3A2291E25EA48002EA46F /* windows_1251.m in Sources */, 4B3D23BA1337FC0D00DD29B8 /* windows_1252.m in Sources */, 4B90B79F133AD87D00BD33CB /* OFAcceptFailedException.m in Sources */, 4BC176311D04963000C32718 /* OFObject+KeyValueCoding.m in Sources */, 4B90B7A1133AD87D00BD33CB /* OFAddressTranslationFailedException.m in Sources */, 4B17FF80133A2D17003E6DCD /* OFAllocFailedException.m in Sources */, Index: src/Makefile ================================================================== --- src/Makefile +++ src/Makefile @@ -158,10 +158,11 @@ ${FOUNDATION_COMPAT_M} \ ${INSTANCE_M} \ iso_8859_15.m \ mac_roman.m \ ${UNICODE_M} \ + windows_1251.m \ windows_1252.m SRCS_FILES += OFSettings_INIFile.m SRCS_SOCKETS += ${OFKERNELEVENTOBSERVER_EPOLL_M} \ ${OFKERNELEVENTOBSERVER_KQUEUE_M} \ ${OFKERNELEVENTOBSERVER_POLL_M} \ Index: src/OFHTTPResponse.m ================================================================== --- src/OFHTTPResponse.m +++ src/OFHTTPResponse.m @@ -111,17 +111,19 @@ (contentType = [_headers objectForKey: @"Content-Type"]) != nil) { contentType = [contentType lowercaseString]; if ([contentType hasSuffix: @"charset=utf-8"]) encoding = OF_STRING_ENCODING_UTF_8; - if ([contentType hasSuffix: @"charset=iso-8859-1"]) + else if ([contentType hasSuffix: @"charset=iso-8859-1"]) encoding = OF_STRING_ENCODING_ISO_8859_1; - if ([contentType hasSuffix: @"charset=iso-8859-15"]) + else if ([contentType hasSuffix: @"charset=iso-8859-15"]) encoding = OF_STRING_ENCODING_ISO_8859_15; - if ([contentType hasSuffix: @"charset=windows-1252"]) + else if ([contentType hasSuffix: @"charset=windows-1251"]) + encoding = OF_STRING_ENCODING_WINDOWS_1251; + else if ([contentType hasSuffix: @"charset=windows-1252"]) encoding = OF_STRING_ENCODING_WINDOWS_1252; - if ([contentType hasSuffix: @"charset=macintosh"]) + else if ([contentType hasSuffix: @"charset=macintosh"]) encoding = OF_STRING_ENCODING_MAC_ROMAN; } if (encoding == OF_STRING_ENCODING_AUTODETECT) encoding = OF_STRING_ENCODING_UTF_8; Index: src/OFLocalization.m ================================================================== --- src/OFLocalization.m +++ src/OFLocalization.m @@ -107,10 +107,12 @@ _encoding = OF_STRING_ENCODING_ISO_8859_1; else if (strcmp(tmp, "iso8859-15") == 0 || strcmp(tmp, "iso-8859-15") == 0) _encoding = OF_STRING_ENCODING_ISO_8859_15; /* Windows uses a codepage */ + else if (strcmp(tmp, "1251") == 0) + _encoding = OF_STRING_ENCODING_WINDOWS_1251; else if (strcmp(tmp, "1252") == 0) _encoding = OF_STRING_ENCODING_WINDOWS_1252; } /* Territory */ Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -58,10 +58,12 @@ OF_STRING_ENCODING_ASCII, /*! ISO 8859-1 */ OF_STRING_ENCODING_ISO_8859_1, /*! ISO 8859-15 */ OF_STRING_ENCODING_ISO_8859_15, + /*! Windows-1251 */ + OF_STRING_ENCODING_WINDOWS_1251, /*! Windows-1252 */ OF_STRING_ENCODING_WINDOWS_1252, /*! Codepage 437 */ OF_STRING_ENCODING_CODEPAGE_437, /*! Codepage 850 */ Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -89,10 +89,12 @@ depth: (size_t)depth; @end extern bool of_unicode_to_iso_8859_15(const of_unichar_t*, unsigned char*, size_t, bool); +extern bool of_unicode_to_windows_1251(const of_unichar_t*, unsigned char*, + size_t, bool); extern bool of_unicode_to_windows_1252(const of_unichar_t*, unsigned char*, size_t, bool); extern bool of_unicode_to_codepage_437(const of_unichar_t*, unsigned char*, size_t, bool); extern bool of_unicode_to_codepage_850(const of_unichar_t*, unsigned char*, @@ -951,17 +953,22 @@ objectForKey: @"Content-Type"]) != nil) { contentType = [contentType lowercaseString]; if ([contentType hasSuffix: @"charset=utf-8"]) encoding = OF_STRING_ENCODING_UTF_8; - if ([contentType hasSuffix: @"charset=iso-8859-1"]) + else if ([contentType hasSuffix: @"charset=iso-8859-1"]) encoding = OF_STRING_ENCODING_ISO_8859_1; - if ([contentType hasSuffix: @"charset=iso-8859-15"]) + else if ([contentType hasSuffix: + @"charset=iso-8859-15"]) encoding = OF_STRING_ENCODING_ISO_8859_15; - if ([contentType hasSuffix: @"charset=windows-1252"]) + else if ([contentType hasSuffix: + @"charset=windows-1251"]) + encoding = OF_STRING_ENCODING_WINDOWS_1251; + else if ([contentType hasSuffix: + @"charset=windows-1252"]) encoding = OF_STRING_ENCODING_WINDOWS_1252; - if ([contentType hasSuffix: @"charset=macintosh"]) + else if ([contentType hasSuffix: @"charset=macintosh"]) encoding = OF_STRING_ENCODING_MAC_ROMAN; } if (encoding == OF_STRING_ENCODING_AUTODETECT) encoding = OF_STRING_ENCODING_UTF_8; @@ -1104,10 +1111,21 @@ (unsigned char*)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; + return length; + case OF_STRING_ENCODING_WINDOWS_1251: + if (length + 1 > maxLength) + @throw [OFOutOfRangeException exception]; + + if (!of_unicode_to_windows_1251(characters, + (unsigned char*)cString, length, lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; + return length; case OF_STRING_ENCODING_WINDOWS_1252: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; @@ -1204,10 +1222,11 @@ break; case OF_STRING_ENCODING_ASCII: case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: + case OF_STRING_ENCODING_WINDOWS_1251: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: case OF_STRING_ENCODING_MAC_ROMAN: cString = [object allocMemoryWithSize: length + 1]; @@ -1270,10 +1289,11 @@ return UTF8StringLength; case OF_STRING_ENCODING_ASCII: case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: + case OF_STRING_ENCODING_WINDOWS_1251: case OF_STRING_ENCODING_WINDOWS_1252: case OF_STRING_ENCODING_CODEPAGE_437: case OF_STRING_ENCODING_CODEPAGE_850: case OF_STRING_ENCODING_MAC_ROMAN: return [self length]; Index: src/OFString_UTF8.m ================================================================== --- src/OFString_UTF8.m +++ src/OFString_UTF8.m @@ -37,10 +37,11 @@ #import "of_asprintf.h" #import "unicode.h" extern const of_char16_t of_iso_8859_15[128]; +extern const of_char16_t of_windows_1251[128]; extern const of_char16_t of_windows_1252[128]; extern const of_char16_t of_codepage_437[128]; extern const of_char16_t of_codepage_850[128]; extern const of_char16_t of_mac_roman[128]; @@ -288,10 +289,13 @@ } switch (encoding) { case OF_STRING_ENCODING_ISO_8859_15: table = of_iso_8859_15; + break; + case OF_STRING_ENCODING_WINDOWS_1251: + table = of_windows_1251; break; case OF_STRING_ENCODING_WINDOWS_1252: table = of_windows_1252; break; case OF_STRING_ENCODING_CODEPAGE_437: Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -470,10 +470,13 @@ _encoding = OF_STRING_ENCODING_ISO_8859_1; else if ([value isEqual: @"iso-8859-15"]) _encoding = OF_STRING_ENCODING_ISO_8859_15; + else if ([value isEqual: @"windows-1251"]) + _encoding = + OF_STRING_ENCODING_WINDOWS_1251; else if ([value isEqual: @"windows-1252"]) _encoding = OF_STRING_ENCODING_WINDOWS_1252; else if ([value isEqual: @"macintosh"]) _encoding = ADDED src/windows_1251.m Index: src/windows_1251.m ================================================================== --- src/windows_1251.m +++ src/windows_1251.m @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 + * Jonathan Schleifer + * + * All rights reserved. + * + * This file is part of ObjFW. It may be distributed under the terms of the + * Q Public License 1.0, which can be found in the file LICENSE.QPL included in + * the packaging of this file. + * + * Alternatively, it may be distributed under the terms of the GNU General + * Public License, either version 2 or 3, which can be found in the file + * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this + * file. + */ + +#include "config.h" + +#import "OFString.h" + +const of_char16_t of_windows_1251[128] = { + 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, + 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, + 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, + 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F +}; + +bool +of_unicode_to_windows_1251(const of_unichar_t *input, unsigned char *output, + size_t length, bool lossy) +{ + for (size_t i = 0; i < length; i++) { + of_unichar_t c = input[i]; + + if OF_UNLIKELY (c > 0x7F) { + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + + if (c >= 0x410 && c <= 0x44F) + output[i] = 0xC0 + (c - 0x410); + else { + switch ((of_char16_t)c) { + case 0x402: + output[i] = 0x80; + break; + case 0x403: + output[i] = 0x81; + break; + case 0x201A: + output[i] = 0x82; + break; + case 0x453: + output[i] = 0x83; + break; + case 0x201E: + output[i] = 0x84; + break; + case 0x2026: + output[i] = 0x85; + break; + case 0x2020: + output[i] = 0x86; + break; + case 0x2021: + output[i] = 0x87; + break; + case 0x20AC: + output[i] = 0x88; + break; + case 0x2030: + output[i] = 0x89; + break; + case 0x409: + output[i] = 0x8A; + break; + case 0x2039: + output[i] = 0x8B; + break; + case 0x40A: + output[i] = 0x8C; + break; + case 0x40C: + output[i] = 0x8D; + break; + case 0x40B: + output[i] = 0x8E; + break; + case 0x40F: + output[i] = 0x8F; + break; + case 0x452: + output[i] = 0x90; + break; + case 0x2018: + output[i] = 0x91; + break; + case 0x2019: + output[i] = 0x92; + break; + case 0x201C: + output[i] = 0x93; + break; + case 0x201D: + output[i] = 0x94; + break; + case 0x2022: + output[i] = 0x95; + break; + case 0x2013: + output[i] = 0x96; + break; + case 0x2014: + output[i] = 0x97; + break; + case 0x2122: + output[i] = 0x99; + break; + case 0x459: + output[i] = 0x9A; + break; + case 0x203A: + output[i] = 0x9B; + break; + case 0x45A: + output[i] = 0x9C; + break; + case 0x45C: + output[i] = 0x9D; + break; + case 0x45B: + output[i] = 0x9E; + break; + case 0x45F: + output[i] = 0x9F; + break; + case 0xA0: + output[i] = 0xA0; + break; + case 0x40E: + output[i] = 0xA1; + break; + case 0x45E: + output[i] = 0xA2; + break; + case 0x408: + output[i] = 0xA3; + break; + case 0xA4: + output[i] = 0xA4; + break; + case 0x490: + output[i] = 0xA5; + break; + case 0xA6: + output[i] = 0xA6; + break; + case 0xA7: + output[i] = 0xA7; + break; + case 0x401: + output[i] = 0xA8; + break; + case 0xA9: + output[i] = 0xA9; + break; + case 0x404: + output[i] = 0xAA; + break; + case 0xAB: + output[i] = 0xAB; + break; + case 0xAC: + output[i] = 0xAC; + break; + case 0xAD: + output[i] = 0xAD; + break; + case 0xAE: + output[i] = 0xAE; + break; + case 0x407: + output[i] = 0xAF; + break; + case 0xB0: + output[i] = 0xB0; + break; + case 0xB1: + output[i] = 0xB1; + break; + case 0x406: + output[i] = 0xB2; + break; + case 0x456: + output[i] = 0xB3; + break; + case 0x491: + output[i] = 0xB4; + break; + case 0xB5: + output[i] = 0xB5; + break; + case 0xB6: + output[i] = 0xB6; + break; + case 0xB7: + output[i] = 0xB7; + break; + case 0x451: + output[i] = 0xB8; + break; + case 0x2116: + output[i] = 0xB9; + break; + case 0x454: + output[i] = 0xBA; + break; + case 0xBB: + output[i] = 0xBB; + break; + case 0x458: + output[i] = 0xBC; + break; + case 0x405: + output[i] = 0xBD; + break; + case 0x455: + output[i] = 0xBE; + break; + case 0x457: + output[i] = 0xBF; + break; + default: + if (lossy) + output[i] = '?'; + else + return false; + + break; + } + } + } else + output[i] = (unsigned char)c; + } + + return true; +}