ObjFW  Check-in [4a01b5b9f7]

Overview
Comment:More efficient UTF-8 -> ISO-8859-3 conversion
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 4a01b5b9f770257c8ed4f756249498ae97b65a5f23ccf67e7b0ef4d0e6e6e34b
User & Date: js on 2017-02-12 16:55:13
Other Links: manifest | tags
Context
2017-02-12
18:33
encodings: Code deduplication check-in: 9fdfdec6f7 user: js tags: trunk
16:55
More efficient UTF-8 -> ISO-8859-3 conversion check-in: 4a01b5b9f7 user: js tags: trunk
16:26
More efficient UTF-8 -> ISO-8859-2 conversion check-in: 66c759859a user: js tags: trunk
Changes

Modified src/encodings/iso_8859_2.m from [b0786ce1a1] to [e1a92dbc8a].

31
32
33
34
35
36
37
38

39
40
41
42
43
44
45
46
47
48
49
50

51
52
53


54
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


75
76

77
78
79
80
81
82


83
84
85
86
87
88
89
31
32
33
34
35
36
37

38
39
40
41
42
43
44
45
46
47
48
49

50
51


52
53
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


73
74
75

76
77
78
79
80


81
82
83
84
85
86
87
88
89







-
+











-
+

-
-
+
+

-
+

















-
-
+
+

-
+




-
-
+
+







	0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
	0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
	0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
};
const size_t of_iso_8859_2_table_offset =
    256 - (sizeof(of_iso_8859_2_table) / sizeof(*of_iso_8859_2_table));

static char page0[] = {
static const char page0[] = {
	0xA0, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0xA7,
	0xA8, 0x00, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00,
	0xB0, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
	0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0xC1, 0xC2, 0x00, 0xC4, 0x00, 0x00, 0xC7,
	0x00, 0xC9, 0x00, 0xCB, 0x00, 0xCD, 0xCE, 0x00,
	0x00, 0x00, 0x00, 0xD3, 0xD4, 0x00, 0xD6, 0xD7,
	0x00, 0x00, 0xDA, 0x00, 0xDC, 0xDD, 0x00, 0xDF,
	0x00, 0xE1, 0xE2, 0x00, 0xE4, 0x00, 0x00, 0xE7,
	0x00, 0xE9, 0x00, 0xEB, 0x00, 0xED, 0xEE, 0x00,
	0x00, 0x00, 0x00, 0xF3, 0xF4, 0x00, 0xF6, 0xF7,
	0x00, 0x00, 0xFA, 0x00, 0xFC, 0xFD, 0x00, 0x00
	0x00, 0x00, 0xFA, 0x00, 0xFC, 0xFD
};
static uint8_t page0Start = 160;
static uint16_t page0Size = sizeof(page0) / sizeof(*page0);
static const uint8_t page0Start = 0xA0;
static const uint16_t page0Size = sizeof(page0) / sizeof(*page0);

static char page1[] = {
static const char page1[] = {
	0xC3, 0xE3, 0xA1, 0xB1, 0xC6, 0xE6, 0x00, 0x00,
	0x00, 0x00, 0xC8, 0xE8, 0xCF, 0xEF, 0xD0, 0xF0,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xCA, 0xEA,
	0xCC, 0xEC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC5,
	0xE5, 0x00, 0x00, 0xA5, 0xB5, 0x00, 0x00, 0xA3,
	0xB3, 0xD1, 0xF1, 0x00, 0x00, 0xD2, 0xF2, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD5, 0xF5,
	0x00, 0x00, 0xC0, 0xE0, 0x00, 0x00, 0xD8, 0xF8,
	0xA6, 0xB6, 0x00, 0x00, 0xAA, 0xBA, 0xA9, 0xB9,
	0xDE, 0xFE, 0xAB, 0xBB, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0xD9, 0xF9, 0xDB, 0xFB,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAC,
	0xBC, 0xAF, 0xBF, 0xAE, 0xBE
};
static uint8_t page1Start = 2;
static uint16_t page1Size = sizeof(page1) / sizeof(*page1);
static const uint8_t page1Start = 0x02;
static const uint16_t page1Size = sizeof(page1) / sizeof(*page1);

static char page2[] = {
static const char page2[] = {
	0xB7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0xA2, 0xFF, 0x00, 0xB2, 0x00, 0xBD
};
static uint8_t page2Start = 0xC7;
static uint16_t page2Size = sizeof(page2) / sizeof(*page2);
static const uint8_t page2Start = 0xC7;
static const uint16_t page2Size = sizeof(page2) / sizeof(*page2);

bool
of_unicode_to_iso_8859_2(const of_unichar_t *input, unsigned char *output,
    size_t length, bool lossy)
{
	for (size_t i = 0; i < length; i++) {
		of_unichar_t c = input[i];

Modified src/encodings/iso_8859_3.m from [c69421d1b9] to [787209b806].

31
32
33
34
35
36
37











































38
39
40
41
42
43
44
45


46
47
48
49
50
51
52
53
54
55
56
57

58
59
60
61

62
63
64
65
66
67





68
69
70
71


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124




125
126

127
128
129



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196







197
198

199
200


201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259


260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281


282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

327

328

329
330
331
332









333
334
335
336
337
338
339
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98




99




100






101
102
103
104
105




106
107





















































108
109
110
111


112



113
114
115



































































116
117
118
119
120
121
122


123


124
125



























































126
127






















128
129













































130
131
132

133
134



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








+
+








-
-
-
-
+
-
-
-
-
+
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
-
-
+
-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
+
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+

+
-
+

-
-
-
+
+
+
+
+
+
+
+
+







	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0xFFFF, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
	0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
};
const size_t of_iso_8859_3_table_offset =
    256 - (sizeof(of_iso_8859_3_table) / sizeof(*of_iso_8859_3_table));

static const char page0[] = {
	0xA0, 0x00, 0x00, 0xA3, 0xA4, 0x00, 0x00, 0xA7,
	0xA8, 0x00, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00,
	0xB0, 0x00, 0xB2, 0xB3, 0xB4, 0xB5, 0x00, 0xB7,
	0xB8, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00,
	0xC0, 0xC1, 0xC2, 0x00, 0xC4, 0x00, 0x00, 0xC7,
	0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
	0x00, 0xD1, 0xD2, 0xD3, 0xD4, 0x00, 0xD6, 0xD7,
	0x00, 0xD9, 0xDA, 0xDB, 0xDC, 0x00, 0x00, 0xDF,
	0xE0, 0xE1, 0xE2, 0x00, 0xE4, 0x00, 0x00, 0xE7,
	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
	0x00, 0xF1, 0xF2, 0xF3, 0xF4, 0x00, 0xF6, 0xF7,
	0x00, 0xF9, 0xFA, 0xFB, 0xFC, 0x00, 0x00, 0x00
};
static const uint8_t page0Start = 0xA0;
static const uint16_t page0Size = sizeof(page0) / sizeof(*page0);

static const uint8_t page1Start = 0x08;
static const char page1[] = {
	0xC6, 0xE6, 0xC5, 0xE5, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0xD8, 0xF8, 0xAB, 0xBB,
	0xD5, 0xF5, 0x00, 0x00, 0xA6, 0xB6, 0xA1, 0xB1,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0xA9, 0xB9, 0x00, 0x00, 0xAC, 0xBC, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0xDE, 0xFE, 0xAA, 0xBA,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0xDD, 0xFD, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0xAF, 0xBF
};
static const uint16_t page1Size = sizeof(page1) / sizeof(*page1);

static const char page2[] = {
	0xA2, 0xFF
};
static const uint8_t page2Start = 0xD8;
static const uint16_t page2Size = sizeof(page2) / sizeof(*page2);

bool
of_unicode_to_iso_8859_3(const of_unichar_t *input, unsigned char *output,
    size_t length, bool lossy)
{
	for (size_t i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c > 0x7F) {
			uint8_t index;

			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}

			if OF_UNLIKELY (c >= 0x80 && c <= 0x9F)
				output[i] = c;
			else {
				switch ((of_char16_t)c) {
			switch (c >> 8) {
				case 0xA0:
					output[i] = 0xA0;
					break;
				case 0x126:
			case 0:
					output[i] = 0xA1;
					break;
				case 0x2D8:
					output[i] = 0xA2;
					break;
				case 0xA3:
				if OF_UNLIKELY ((c & 0xFF) < page0Start) {
					output[i] = (unsigned char)c;
					continue;
				}

					output[i] = 0xA3;
					break;
				case 0xA4:
					output[i] = 0xA4;
				index = (c & 0xFF) - page0Start;
				if (index >= page0Size || page0[index] == 0) {
					break;
				case 0x124:
					output[i] = 0xA6;
					break;
				case 0xA7:
					output[i] = 0xA7;
					break;
				case 0xA8:
					output[i] = 0xA8;
					break;
				case 0x130:
					output[i] = 0xA9;
					break;
				case 0x15E:
					output[i] = 0xAA;
					break;
				case 0x11E:
					output[i] = 0xAB;
					break;
				case 0x134:
					output[i] = 0xAC;
					break;
				case 0xAD:
					output[i] = 0xAD;
					break;
				case 0x17B:
					output[i] = 0xAF;
					break;
				case 0xB0:
					output[i] = 0xB0;
					break;
				case 0x127:
					output[i] = 0xB1;
					break;
				case 0xB2:
					output[i] = 0xB2;
					break;
				case 0xB3:
					output[i] = 0xB3;
					break;
				case 0xB4:
					output[i] = 0xB4;
					break;
				case 0xB5:
					output[i] = 0xB5;
					break;
				case 0x125:
					output[i] = 0xB6;
					break;
				case 0xB7:
					output[i] = 0xB7;
					break;
				case 0xB8:
					if (lossy) {
						output[i] = '?';
						continue;
					} else
					output[i] = 0xB8;
					break;
						return false;
				case 0x131:
					output[i] = 0xB9;
					break;
				}
				output[i] = page0[index];
				break;
				case 0x15F:
					output[i] = 0xBA;
					break;
				case 0x11F:
					output[i] = 0xBB;
					break;
				case 0x135:
					output[i] = 0xBC;
					break;
				case 0xBD:
					output[i] = 0xBD;
					break;
				case 0x17C:
					output[i] = 0xBF;
					break;
				case 0xC0:
					output[i] = 0xC0;
					break;
				case 0xC1:
					output[i] = 0xC1;
					break;
				case 0xC2:
					output[i] = 0xC2;
					break;
				case 0xC4:
					output[i] = 0xC4;
					break;
				case 0x10A:
					output[i] = 0xC5;
					break;
				case 0x108:
					output[i] = 0xC6;
					break;
				case 0xC7:
					output[i] = 0xC7;
					break;
				case 0xC8:
					output[i] = 0xC8;
					break;
				case 0xC9:
					output[i] = 0xC9;
					break;
				case 0xCA:
					output[i] = 0xCA;
					break;
				case 0xCB:
					output[i] = 0xCB;
					break;
				case 0xCC:
					output[i] = 0xCC;
					break;
				case 0xCD:
					output[i] = 0xCD;
					break;
				case 0xCE:
					output[i] = 0xCE;
					break;
				case 0xCF:
					output[i] = 0xCF;
					break;
				case 0xD1:
					output[i] = 0xD1;
					break;
				case 0xD2:
					output[i] = 0xD2;
					break;
				case 0xD3:
			case 1:
				index = (c & 0xFF) - page1Start;
				if (index >= page1Size || page1[index] == 0) {
					if (lossy) {
						output[i] = '?';
						continue;
					} else
					output[i] = 0xD3;
					break;
						return false;
				case 0xD4:
					output[i] = 0xD4;
				}
				output[i] = page1[index];
					break;
				case 0x120:
					output[i] = 0xD5;
					break;
				case 0xD6:
					output[i] = 0xD6;
					break;
				case 0xD7:
					output[i] = 0xD7;
					break;
				case 0x11C:
					output[i] = 0xD8;
					break;
				case 0xD9:
					output[i] = 0xD9;
					break;
				case 0xDA:
					output[i] = 0xDA;
					break;
				case 0xDB:
					output[i] = 0xDB;
					break;
				case 0xDC:
					output[i] = 0xDC;
					break;
				case 0x16C:
					output[i] = 0xDD;
					break;
				case 0x15C:
					output[i] = 0xDE;
					break;
				case 0xDF:
					output[i] = 0xDF;
					break;
				case 0xE0:
					output[i] = 0xE0;
					break;
				case 0xE1:
					output[i] = 0xE1;
					break;
				case 0xE2:
					output[i] = 0xE2;
					break;
				case 0xE4:
					output[i] = 0xE4;
					break;
				case 0x10B:
					output[i] = 0xE5;
					break;
				case 0x109:
					output[i] = 0xE6;
					break;
				case 0xE7:
					output[i] = 0xE7;
					break;
				case 0xE8:
					output[i] = 0xE8;
					break;
				case 0xE9:
				break;
			case 2:
					output[i] = 0xE9;
					break;
				case 0xEA:
					output[i] = 0xEA;
					break;
				case 0xEB:
					output[i] = 0xEB;
					break;
				case 0xEC:
					output[i] = 0xEC;
					break;
				case 0xED:
					output[i] = 0xED;
					break;
				case 0xEE:
					output[i] = 0xEE;
					break;
				case 0xEF:
					output[i] = 0xEF;
					break;
				case 0xF1:
					output[i] = 0xF1;
				index = (c & 0xFF) - page2Start;
				if (index >= page2Size || page2[index] == 0) {
					break;
				case 0xF2:
					output[i] = 0xF2;
					break;
				case 0xF3:
					output[i] = 0xF3;
					break;
				case 0xF4:
					output[i] = 0xF4;
					break;
				case 0x121:
					output[i] = 0xF5;
					break;
				case 0xF6:
					output[i] = 0xF6;
					break;
				case 0xF7:
					output[i] = 0xF7;
					break;
				case 0x11D:
					output[i] = 0xF8;
					break;
				case 0xF9:
					output[i] = 0xF9;
					break;
				case 0xFA:
					output[i] = 0xFA;
					break;
				case 0xFB:
					output[i] = 0xFB;
					break;
				case 0xFC:
					output[i] = 0xFC;
					break;
				case 0x16D:
					output[i] = 0xFD;
					break;
				case 0x15D:
					output[i] = 0xFE;
					break;
				case 0x2D9:
					output[i] = 0xFF;
					break;
				default:
					if (lossy)
					if (lossy) {
						output[i] = '?';
						continue;
					else
					} else
						return false;

					break;
				}
				}
				output[i] = page2[index];
				break;
			default:
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}
		} else
			output[i] = (unsigned char)c;
	}

	return true;
}