Index: src/encodings/windows_1252.m ================================================================== --- src/encodings/windows_1252.m +++ src/encodings/windows_1252.m @@ -15,10 +15,12 @@ */ #include "config.h" #import "OFString.h" + +#import "common.h" const of_char16_t of_windows_1252_table[] = { 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFF, 0x017D, 0xFFFF, 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, @@ -37,124 +39,99 @@ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF }; const size_t of_windows_1252_table_offset = 256 - (sizeof(of_windows_1252_table) / sizeof(*of_windows_1252_table)); +static const unsigned char page0[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const uint8_t page0Start = 0x80; + +static const unsigned char page1[] = { + 0x8C, 0x9C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x9A, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9F, 0x00, + 0x00, 0x00, 0x00, 0x8E, 0x9E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x83 +}; +static const uint8_t page1Start = 0x52; + +static const unsigned char page2[] = { + 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98 +}; +static const uint8_t page2Start = 0xC6; + +static const unsigned char page20[] = { + 0x96, 0x97, 0x00, 0x00, 0x00, 0x91, 0x92, 0x82, + 0x00, 0x93, 0x94, 0x84, 0x00, 0x86, 0x87, 0x95, + 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8B, 0x9B, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80 +}; +static const uint8_t page20Start = 0x13; + +static const unsigned char page21[] = { + 0x99 +}; +static const uint8_t page21Start = 0x22; + bool of_unicode_to_windows_1252(const of_unichar_t *input, unsigned char *output, size_t length, bool lossy) { for (size_t i = 0; i < length; i++) { of_unichar_t c = input[i]; - if OF_UNLIKELY (c > 0xFF) { + if OF_UNLIKELY (c > 0x7F) { + uint8_t index; + if OF_UNLIKELY (c > 0xFFFF) { if (lossy) { output[i] = '?'; continue; } else return false; } - switch ((of_char16_t)c) { - case 0x20AC: - output[i] = 0x80; - break; - case 0x201A: - output[i] = 0x82; - break; - case 0x192: - output[i] = 0x83; - break; - case 0x201E: - output[i] = 0x84; - break; - case 0x2026: - output[i] = 0x85; - break; - case 0x2020: - output[i] = 0x86; - break; - case 0x2021: - output[i] = 0x87; - break; - case 0x2C6: - output[i] = 0x88; - break; - case 0x2030: - output[i] = 0x89; - break; - case 0x160: - output[i] = 0x8A; - break; - case 0x2039: - output[i] = 0x8B; - break; - case 0x152: - output[i] = 0x8C; - break; - case 0x17D: - output[i] = 0x8E; - break; - case 0x2018: - output[i] = 0x91; - break; - case 0x2019: - output[i] = 0x92; - break; - case 0x201C: - output[i] = 0x93; - break; - case 0x201D: - output[i] = 0x94; - break; - case 0x2022: - output[i] = 0x95; - break; - case 0x2013: - output[i] = 0x96; - break; - case 0x2014: - output[i] = 0x97; - break; - case 0x2DC: - output[i] = 0x98; - break; - case 0x2122: - output[i] = 0x99; - break; - case 0x161: - output[i] = 0x9A; - break; - case 0x203A: - output[i] = 0x9B; - break; - case 0x153: - output[i] = 0x9C; - break; - case 0x17E: - output[i] = 0x9E; - break; - case 0x178: - output[i] = 0x9F; - break; + switch (c >> 8) { + CASE_MISSING_IS_KEEP(0) + CASE_MISSING_IS_ERROR(1) + CASE_MISSING_IS_ERROR(2) + CASE_MISSING_IS_ERROR(20) + CASE_MISSING_IS_ERROR(21) default: - if (lossy) - output[i] = '?'; - else - return false; - - break; - } - } else { - if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) { - if (lossy) - output[i] = '?'; - else - return false; - } else - output[i] = (unsigned char)c; - } + if (lossy) { + output[i] = '?'; + continue; + } else + return false; + } + } else + output[i] = (unsigned char)c; } return true; }