Index: configure.ac ================================================================== --- configure.ac +++ configure.ac @@ -935,10 +935,11 @@ ENCODING_FLAG(iso-8859-3, iso_8859_3, [ISO 8859-3], HAVE_ISO_8859_3) ENCODING_FLAG(iso-8859-15, iso_8859_15, [ISO 8859-15], HAVE_ISO_8859_15) ENCODING_FLAG(koi8-r, koi8_r, [KOI8-R], HAVE_KOI8_R) ENCODING_FLAG(koi8-u, koi8_u, [KOI8-U], HAVE_KOI8_U) ENCODING_FLAG(mac-roman, mac_roman, [Mac Roman encoding], HAVE_MAC_ROMAN) +ENCODING_FLAG(windows-1250, windows_1250, [Windows-1250], HAVE_WINDOWS_1250) ENCODING_FLAG(windows-1251, windows_1251, [Windows-1251], HAVE_WINDOWS_1251) ENCODING_FLAG(windows-1252, windows_1252, [Windows-1252], HAVE_WINDOWS_1252) AS_IF([test x"$ENCODINGS_SRCS" = x""], [ ENCODINGS_SRCS="dummy.m" Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -93,10 +93,12 @@ OFStringEncodingMacRoman, /** KOI8-R */ OFStringEncodingKOI8R, /** KOI8-U */ OFStringEncodingKOI8U, + /** Windows-1250 */ + OFStringEncodingWindows1250, /** Try to automatically detect the encoding */ OFStringEncodingAutodetect = -1 } OFStringEncoding; /** Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -108,10 +108,12 @@ size_t, bool); extern bool _OFUnicodeToISO8859_3(const OFUnichar *, unsigned char *, size_t, bool); extern bool _OFUnicodeToISO8859_15(const OFUnichar *, unsigned char *, size_t, bool); +extern bool _OFUnicodeToWindows1250(const OFUnichar *, unsigned char *, + size_t, bool); extern bool _OFUnicodeToWindows1251(const OFUnichar *, unsigned char *, size_t, bool); extern bool _OFUnicodeToWindows1252(const OFUnichar *, unsigned char *, size_t, bool); extern bool _OFUnicodeToCodepage437(const OFUnichar *, unsigned char *, @@ -170,10 +172,14 @@ [string isEqual: @"iso_8859-3"]) encoding = OFStringEncodingISO8859_3; else if ([string isEqual: @"iso-8859-15"] || [string isEqual: @"iso_8859-15"]) encoding = OFStringEncodingISO8859_15; + else if ([string isEqual: @"windows-1250"] || + [string isEqual: @"cp1250"] || [string isEqual: @"cp-1250"] || + [string isEqual: @"1250"]) + encoding = OFStringEncodingWindows1250; else if ([string isEqual: @"windows-1251"] || [string isEqual: @"cp1251"] || [string isEqual: @"cp-1251"] || [string isEqual: @"1251"]) encoding = OFStringEncodingWindows1251; else if ([string isEqual: @"windows-1252"] || @@ -217,10 +223,12 @@ return @"ISO 8859-2"; case OFStringEncodingISO8859_3: return @"ISO 8859-3"; case OFStringEncodingISO8859_15: return @"ISO 8859-15"; + case OFStringEncodingWindows1250: + return @"Windows-1250"; case OFStringEncodingWindows1251: return @"Windows-1251"; case OFStringEncodingWindows1252: return @"Windows-1252"; case OFStringEncodingCodepage437: @@ -1164,10 +1172,23 @@ (unsigned char *)cString, length, lossy)) @throw [OFInvalidEncodingException exception]; cString[length] = '\0'; + return length; +#endif +#ifdef HAVE_WINDOWS_1250 + case OFStringEncodingWindows1250: + if (length + 1 > maxLength) + @throw [OFOutOfRangeException exception]; + + if (!_OFUnicodeToWindows1250(characters, + (unsigned char *)cString, length, lossy)) + @throw [OFInvalidEncodingException exception]; + + cString[length] = '\0'; + return length; #endif #ifdef HAVE_WINDOWS_1251 case OFStringEncodingWindows1251: if (length + 1 > maxLength) @@ -1330,10 +1351,11 @@ case OFStringEncodingASCII: case OFStringEncodingISO8859_1: case OFStringEncodingISO8859_2: case OFStringEncodingISO8859_3: case OFStringEncodingISO8859_15: + case OFStringEncodingWindows1250: case OFStringEncodingWindows1251: case OFStringEncodingWindows1252: case OFStringEncodingCodepage437: case OFStringEncodingCodepage850: case OFStringEncodingCodepage858: @@ -1413,10 +1435,11 @@ case OFStringEncodingASCII: case OFStringEncodingISO8859_1: case OFStringEncodingISO8859_2: case OFStringEncodingISO8859_3: case OFStringEncodingISO8859_15: + case OFStringEncodingWindows1250: case OFStringEncodingWindows1251: case OFStringEncodingWindows1252: case OFStringEncodingCodepage437: case OFStringEncodingCodepage850: case OFStringEncodingCodepage858: Index: src/OFUTF8String.m ================================================================== --- src/OFUTF8String.m +++ src/OFUTF8String.m @@ -47,10 +47,12 @@ extern const size_t _OFISO8859_2TableOffset; extern const OFChar16 _OFISO8859_3Table[]; extern const size_t _OFISO8859_3TableOffset; extern const OFChar16 _OFISO8859_15Table[]; extern const size_t _OFISO8859_15TableOffset; +extern const OFChar16 _OFWindows1250Table[]; +extern const size_t _OFWindows1250TableOffset; extern const OFChar16 _OFWindows1251Table[]; extern const size_t _OFWindows1251TableOffset; extern const OFChar16 _OFWindows1252Table[]; extern const size_t _OFWindows1252TableOffset; extern const OFChar16 _OFCodepage437Table[]; @@ -322,10 +324,13 @@ CASE(OFStringEncodingISO8859_3, _OFISO8859_3Table) #endif #ifdef HAVE_ISO_8859_15 CASE(OFStringEncodingISO8859_15, _OFISO8859_15Table) #endif +#ifdef HAVE_WINDOWS_1250 + CASE(OFStringEncodingWindows1250, _OFWindows1250Table) +#endif #ifdef HAVE_WINDOWS_1251 CASE(OFStringEncodingWindows1251, _OFWindows1251Table) #endif #ifdef HAVE_WINDOWS_1252 CASE(OFStringEncodingWindows1252, _OFWindows1252Table) Index: src/encodings/common.h ================================================================== --- src/encodings/common.h +++ src/encodings/common.h @@ -16,11 +16,11 @@ * version 3.0 along with this program. If not, see * . */ #define CASE_MISSING_IS_KEEP(nr) \ - case nr: \ + case 0x##nr: \ if OF_UNLIKELY ((c & 0xFF) < page##nr##Start) { \ output[i] = (unsigned char)c; \ continue; \ } \ \ @@ -29,11 +29,11 @@ if (idx >= sizeof(page##nr)) { \ output[i] = (unsigned char)c; \ continue; \ } \ \ - if (page##nr[idx] == 0x00) { \ + if (page##nr[idx] == 0) { \ if (lossy) { \ output[i] = '?'; \ continue; \ } else \ return false; \ @@ -51,15 +51,15 @@ return false; \ } \ \ idx = (c & 0xFF) - page##nr##Start; \ \ - if (idx >= sizeof(page##nr) || page##nr[idx] == 0) { \ + if (idx >= sizeof(page##nr) || page##nr[idx] == 0) { \ if (lossy) { \ output[i] = '?'; \ continue; \ } else \ return false; \ } \ \ output[i] = page##nr[idx]; \ break; ADDED src/encodings/windows-1250.m Index: src/encodings/windows-1250.m ================================================================== --- /dev/null +++ src/encodings/windows-1250.m @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2008-2024 Jonathan Schleifer + * + * All rights reserved. + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3.0 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * version 3.0 for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * version 3.0 along with this program. If not, see + * . + */ + +#include "config.h" + +#import "OFString.h" + +#import "common.h" + +const OFChar16 _OFWindows1250Table[] OF_VISIBILITY_HIDDEN = { + 0x20AC, 0xFFFF, 0x201A, 0xFFFF, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFF, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9 +}; +const size_t _OFWindows1250TableOffset OF_VISIBILITY_HIDDEN = + 256 - (sizeof(_OFWindows1250Table) / sizeof(*_OFWindows1250Table)); + +static const unsigned char page0[] = { + 0xA0, 0x00, 0x00, 0x00, 0xA4, 0x00, 0xA6, 0xA7, + 0xA8, 0xA9, 0x00, 0xAB, 0xAC, 0xAD, 0xAE, 0x00, + 0xB0, 0xB1, 0x00, 0x00, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xC1, 0xC2, 0x00, 0xC4, 0x00, 0x00, 0xC7, + 0x00, 0xC9, 0x00, 0xCB, 0x00, 0xCD, 0xCE, 0x00, + 0x00, 0x00, 0x00, 0xD3, 0xD4, 0x00, 0xD6, 0xD7, + 0x00, 0x00, 0xDA, 0x00, 0xDC, 0xDD, 0x00, 0xDF, + 0x00, 0xE1, 0xE2, 0x00, 0xE4, 0x00, 0x00, 0xE7, + 0x00, 0xE9, 0x00, 0xEB, 0x00, 0xED, 0xEE, 0x00, + 0x00, 0x00, 0x00, 0xF3, 0xF4, 0x00, 0xF6, 0xF7, + 0x00, 0x00, 0xFA, 0x00, 0xFC, 0xFD +}; +static const uint8_t page0Start = 0xA0; + +static const unsigned char page1[] = { + 0xC3, 0xE3, 0xA5, 0xB9, 0xC6, 0xE6, 0x00, 0x00, + 0x00, 0x00, 0xC8, 0xE8, 0xCF, 0xEF, 0xD0, 0xF0, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xCA, 0xEA, + 0xCC, 0xEC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC5, + 0xE5, 0x00, 0x00, 0xBC, 0xBE, 0x00, 0x00, 0xA3, + 0xB3, 0xD1, 0xF1, 0x00, 0x00, 0xD2, 0xF2, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD5, 0xF5, + 0x00, 0x00, 0xC0, 0xE0, 0x00, 0x00, 0xD8, 0xF8, + 0x8C, 0x9C, 0x00, 0x00, 0xAA, 0xBA, 0x8A, 0x9A, + 0xDE, 0xFE, 0x8D, 0x9D, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xD9, 0xF9, 0xDB, 0xFB, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8F, + 0x9F, 0xAF, 0xBF, 0x8E, 0x9E +}; +static const uint8_t page1Start = 0x02; + +static const unsigned char page2[] = { + 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xA2, 0xFF, 0x00, 0xB2, 0x00, 0xBD +}; +static const uint8_t page2Start = 0xC7; + +static const unsigned char page20[] = { + 0x96, 0x97, 0x00, 0x00, 0x00, 0x91, 0x92, 0x82, + 0x00, 0x93, 0x94, 0x84, 0x00, 0x86, 0x87, 0x95, + 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8B, 0x9B, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80 +}; +static const uint8_t page20Start = 0x13; + +static const unsigned char page21[] = { + 0x99 +}; +static const uint8_t page21Start = 0x22; + +bool OF_VISIBILITY_HIDDEN +_OFUnicodeToWindows1250(const OFUnichar *input, unsigned char *output, + size_t length, bool lossy) +{ + for (size_t i = 0; i < length; i++) { + OFUnichar c = input[i]; + + if OF_UNLIKELY (c > 0x7F) { + uint8_t idx; + + if OF_UNLIKELY (c > 0xFFFF) { + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + + switch (c >> 8) { + CASE_MISSING_IS_ERROR(0) + CASE_MISSING_IS_ERROR(1) + CASE_MISSING_IS_ERROR(2) + CASE_MISSING_IS_ERROR(20) + CASE_MISSING_IS_ERROR(21) + default: + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + } else + output[i] = (unsigned char)c; + } + + return true; +} Index: src/platform/Windows/OFWin32ConsoleStdIOStream.m ================================================================== --- src/platform/Windows/OFWin32ConsoleStdIOStream.m +++ src/platform/Windows/OFWin32ConsoleStdIOStream.m @@ -69,10 +69,12 @@ return OFStringEncodingCodepage437; case 850: return OFStringEncodingCodepage850; case 858: return OFStringEncodingCodepage858; + case 1250: + return OFStringEncodingWindows1250; case 1251: return OFStringEncodingWindows1251; case 1252: return OFStringEncodingWindows1252; default: Index: tests/OFStringTests.m ================================================================== --- tests/OFStringTests.m +++ tests/OFStringTests.m @@ -258,10 +258,36 @@ OTAssertEqualObjects([self.stringClass stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE" encoding: OFStringEncodingISO8859_15], @"€ŠšŽžŒœŸ"); } #endif + +#ifdef HAVE_WINDOWS_1250 +- (void)testStringWithCStringEncodingWindows1250 +{ + OTAssertEqualObjects([self.stringClass + stringWithCString: "\x80\x82\x84\x85\x86\x87\x89\x8A" + "\x8B\x8C\x8D\x8E\x8F\x91\x92\x93" + "\x94\x95\x96\x97\x99\x9A\x9B\x9C" + "\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4" + "\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC" + "\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4" + "\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC" + "\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4" + "\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC" + "\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4" + "\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC" + "\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4" + "\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC" + "\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4" + "\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC" + "\xFD\xFE\xFF" + encoding: OFStringEncodingWindows1250], + @"€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄ" + @"ĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙"); +} +#endif #ifdef HAVE_WINDOWS_1252 - (void)testStringWithCStringEncodingWindows1252 { OTAssertEqualObjects([self.stringClass @@ -331,10 +357,34 @@ [[self.stringClass stringWithString: @"This is ä t€st…"] cStringWithEncoding: OFStringEncodingISO8859_15], OFInvalidEncodingException); } #endif + +#ifdef HAVE_WINDOWS_1250 +- (void)testCStringWithEncodingWindows1250 +{ + OTAssertEqual( + strcmp([[self.stringClass stringWithString: + @"€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄ" + @"ĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙"] + cStringWithEncoding: OFStringEncodingWindows1250], + "\x80\x82\x84\x85\x86\x87\x89\x8A\x8B\x8C\x8D\x8E\x8F\x91\x92\x93" + "\x94\x95\x96\x97\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4" + "\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4" + "\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4" + "\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4" + "\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4" + "\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4" + "\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"), 0); + + OTAssertThrowsSpecific( + [[self.stringClass stringWithString: @"This is ä t€st…‼"] + cStringWithEncoding: OFStringEncodingWindows1250], + OFInvalidEncodingException); +} +#endif #ifdef HAVE_WINDOWS_1252 - (void)testCStringWithEncodingWindows1252 { OTAssertEqual( @@ -386,10 +436,20 @@ strcmp([[self.stringClass stringWithString: @"This is ä t€st…"] lossyCStringWithEncoding: OFStringEncodingISO8859_15], "This is \xE4 t\xA4st?"), 0); } #endif + +#ifdef HAVE_WINDOWS_1250 +- (void)testLossyCStringWithEncodingWindows1250 +{ + OTAssertEqual( + strcmp([[self.stringClass stringWithString: @"This is ä t€st…‼"] + lossyCStringWithEncoding: OFStringEncodingWindows1250], + "This is \xE4 t\x80st\x85?"), 0); +} +#endif #ifdef HAVE_WINDOWS_1252 - (void)testLossyCStringWithEncodingWindows1252 { OTAssertEqual(