ObjFW  Check-in [6bf2b02dc4]

Overview
Comment:More efficient UTF-8 -> ISO-8859-15 conversion
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 6bf2b02dc4977b71512ff2c0f4a016dd86ebc38a5489c8eacece18b666b659b6
User & Date: js on 2017-02-12 20:05:03
Other Links: manifest | tags
Context
2017-02-12
22:04
More efficient UTF-8 -> Codepage 437 conversion check-in: 84b20a9ce0 user: js tags: trunk
20:05
More efficient UTF-8 -> ISO-8859-15 conversion check-in: 6bf2b02dc4 user: js tags: trunk
18:33
encodings: Code deduplication check-in: 9fdfdec6f7 user: js tags: trunk
Changes

Modified src/encodings/common.h from [98a0dea301] to [40bc42d9ab].

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
			} else					\
				return false;			\
		}						\
								\
		output[i] = page##nr[index];			\
		break;
#define CASE_MISSING_IS_ERROR(nr)					\
	case nr:							\
		if OF_UNLIKELY ((c & 0xFF) < page##nr##Start) {		\
			if (lossy) {					\
				output[i] = '?';			\
				continue;				\
			} else						\
				return false;				\
		}							\







|







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
			} else					\
				return false;			\
		}						\
								\
		output[i] = page##nr[index];			\
		break;
#define CASE_MISSING_IS_ERROR(nr)					\
	case 0x##nr:							\
		if OF_UNLIKELY ((c & 0xFF) < page##nr##Start) {		\
			if (lossy) {					\
				output[i] = '?';			\
				continue;				\
			} else						\
				return false;				\
		}							\

Modified src/encodings/iso_8859_15.m from [e7f05e3dae] to [951edda6ef].

13
14
15
16
17
18
19


20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


























38
39
40
41
42
43
44
45


46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

79
80
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

#include "config.h"

#import "OFString.h"



const of_char16_t of_iso_8859_15_table[] = {
	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
	0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
	0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
};
const size_t of_iso_8859_15_table_offset =
    256 - (sizeof(of_iso_8859_15_table) / sizeof(*of_iso_8859_15_table));



























bool
of_unicode_to_iso_8859_15(const of_unichar_t *input, unsigned char *output,
    size_t length, bool lossy)
{
	for (size_t i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c > 0xFF) {


			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}

			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0xA4;
				break;
			case 0x160:
				output[i] = 0xA6;
				break;
			case 0x161:
				output[i] = 0xA8;
				break;
			case 0x17D:
				output[i] = 0xB4;
				break;
			case 0x17E:
				output[i] = 0xB8;
				break;
			case 0x152:
				output[i] = 0xBC;
				break;
			case 0x153:
				output[i] = 0xBD;
				break;
			case 0x178:
				output[i] = 0xBE;
				break;

			default:
				if (lossy)
					output[i] = '?';

				else
					return false;

				break;
			}
		} else {
			switch (c) {
			case 0xA4:
			case 0xA6:
			case 0xA8:
			case 0xB4:
			case 0xB8:
			case 0xBC:
			case 0xBD:
			case 0xBE:
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			default:
				output[i] = (unsigned char)c;
				break;
			}
		}
	}

	return true;
}







>
>


















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







|
>
>








|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>

|

>
|

|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
|
<
|
|
<
<


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86






















87
88
89
90
91
92
93
94














95




96

97
98


99
100
 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

#include "config.h"

#import "OFString.h"

#import "common.h"

const of_char16_t of_iso_8859_15_table[] = {
	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
	0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
	0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
};
const size_t of_iso_8859_15_table_offset =
    256 - (sizeof(of_iso_8859_15_table) / sizeof(*of_iso_8859_15_table));

static const char page0[] = {
	0x00, 0xA5, 0x00, 0xA7, 0x00, 0xA9, 0xAA, 0xAB,
	0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
	0x00, 0xB5, 0xB6, 0xB7, 0x00, 0xB9, 0xBA, 0xBB,
	0x00, 0x00, 0x00
};
static const uint8_t page0Start = 0xA4;
static const uint16_t page0Size = sizeof(page0) / sizeof(*page0);

static const char page1[] = {
	0xBC, 0xBD, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0xA8,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xBE, 0x00,
	0x00, 0x00, 0x00, 0xB4, 0xB8
};
static const uint8_t page1Start = 0x52;
static const uint16_t page1Size = sizeof(page1) / sizeof(*page1);

static const char page20[] = {
	0xA4
};
static const uint8_t page20Start = 0xAC;
static const uint16_t page20Size = sizeof(page20) / sizeof(*page20);

bool
of_unicode_to_iso_8859_15(const of_unichar_t *input, unsigned char *output,
    size_t length, bool lossy)
{
	for (size_t i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c > 0x7F) {
			uint8_t index;

			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}

			switch (c >> 8) {
			CASE_MISSING_IS_KEEP(0)
			CASE_MISSING_IS_ERROR(1)






















			CASE_MISSING_IS_ERROR(20)
			default:
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}














		} else




			output[i] = (unsigned char)c;

	}



	return true;
}