ObjFW  Check-in [7c26551b67]

Overview
Comment:Clean up Unicode -> * conversions.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 7c26551b673324c22797a275f3aec6c8e36d7f8d6e311ca5283418b5abae4d6c
User & Date: js on 2014-01-19 14:10:27
Other Links: manifest | tags
Context
2014-01-19
14:17
Add Unicode -> Codepage 437 conversion. check-in: e66defc073 user: js tags: trunk
14:10
Clean up Unicode -> * conversions. check-in: 7c26551b67 user: js tags: trunk
12:00
Add lookup-asm-ppc-macho.S. check-in: d80d091b0e user: js tags: trunk
Changes

Modified src/OFString.m from [17a63459d4] to [04e1844f94].

1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017

1018
1019
1020
1021
1022
1023
1024
1025
1007
1008
1009
1010
1011
1012
1013




1014

1015
1016
1017
1018
1019
1020
1021







-
-
-
-
+
-







			case 3:
			case 4:
				memcpy(cString + j, buffer, len);
				j += len;

				break;
			default:
				if (lossy)
					cString[j++] = '?';
				else
					@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
					    exception];

				break;
			}
		}

		cString[j] = '\0';

Modified src/iso_8859_15.m from [25eed555d5] to [efb74e1c28].

42
43
44
45
46
47
48
49

50
51
52
53
54
55
56







57
58
59
60
61
62
63
64
65
42
43
44
45
46
47
48

49







50
51
52
53
54
55
56
57

58
59
60
61
62
63
64







-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+

-







    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c == 0xA4 || c == 0xA6 || c == 0xA8 ||
		if OF_UNLIKELY (c > 0xFF) {
		    c == 0xB4 || c == 0xB8 || c == 0xBC || c == 0xBD ||
		    c == 0xBE || c > 0xFFFF) {
			if (lossy)
				output[i] = '?';
			else
				return false;
		}
			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}

		if OF_UNLIKELY (c > 0xFF) {
			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0xA4;
				break;
			case 0x160:
				output[i] = 0xA6;
				break;
85
86
87
88
89
90
91
92
93





















94
95
96
97
84
85
86
87
88
89
90


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115







-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}
		} else
			output[i] = (uint8_t)c;
		} else {
			switch (c) {
			case 0xA4:
			case 0xA6:
			case 0xA8:
			case 0xB4:
			case 0xB8:
			case 0xBC:
			case 0xBD:
			case 0xBE:
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			default:
				output[i] = (uint8_t)c;
				break;
			}
		}
	}

	return true;
}

Modified src/windows_1252.m from [efb7bb469a] to [6e0427f4c2].

42
43
44
45
46
47
48
49
50
51
52
53
54








55
56
57
58
59
60
61
62
63
42
43
44
45
46
47
48






49
50
51
52
53
54
55
56
57

58
59
60
61
62
63
64







-
-
-
-
-
-
+
+
+
+
+
+
+
+

-







    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY ((c >= 0x80 && c <= 0x9F) || c > 0xFFFF) {
			if (lossy)
				output[i] = '?';
			else
				return false;
		}
		if OF_UNLIKELY (c > 0xFF) {
			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}

		if OF_UNLIKELY (c > 0xFF) {
			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0x80;
				break;
			case 0x201A:
				output[i] = 0x82;
				break;
140
141
142
143
144
145
146






147
148



149
150
151
152
141
142
143
144
145
146
147
148
149
150
151
152
153


154
155
156
157
158
159
160







+
+
+
+
+
+
-
-
+
+
+




				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}
		} else {
			if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) {
				if (lossy)
					output[i] = '?';
				else
					return false;
		} else
			output[i] = (uint8_t)c;
			} else
				output[i] = (uint8_t)c;
		}
	}

	return true;
}

Modified tests/OFStringTests.m from [b2e299ece7] to [da479fb931].

17
18
19
20
21
22
23

24
25
26
27
28
29
30
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31







+







#include "config.h"

#include <stdlib.h>
#include <string.h>
#include <math.h>

#import "OFString.h"
#import "OFMutableString_UTF8.h"
#import "OFArray.h"
#import "OFURL.h"
#import "OFAutoreleasePool.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
194
195
196
197
198
199
200
201

202
203
204

205
206
207
208
209

210
211
212
213
214

215
216
217
218
219
220
221
222

223
224
225
226
227

228
229
230
231

232
233
234
235

236
237
238
239

240
241
242
243

244
245
246
247

248
249
250
251

252
253
254
255

256
257
258
259

260
261
262
263

264
265
266
267

268
269
270
271

272
273
274
275
276
277
278
195
196
197
198
199
200
201

202
203
204

205
206
207
208
209

210
211
212
213
214

215
216
217
218
219
220
221
222

223
224
225
226
227

228
229
230
231

232
233
234
235

236
237
238
239

240
241
242
243

244
245
246
247

248
249
250
251

252
253
254
255

256
257
258
259

260
261
262
263

264
265
266
267

268
269
270
271

272
273
274
275
276
277
278
279







-
+


-
+




-
+




-
+







-
+




-
+



-
+



-
+



-
+



-
+



-
+



-
+



-
+



-
+



-
+



-
+



-
+







	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xE0\x80"])
	EXPECT_EXCEPTION(@"Detection of invalid UTF-8 encoding #2",
	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xF0\x80\x80\xC0"])

	TEST(@"-[reverse] on UTF-8 strings",
	    (s[0] = [OFMutableString stringWithUTF8String: "äöü€𝄞"]) &&
	    (s[0] = [OFMutableString_UTF8 stringWithUTF8String: "äöü€𝄞"]) &&
	    R([s[0] reverse]) && [s[0] isEqual: @"𝄞€üöä"])

	TEST(@"Conversion of ISO 8859-1 to UTF-8",
	TEST(@"Conversion of ISO 8859-1 to Unicode",
	    [[OFString stringWithCString: "\xE4\xF6\xFC"
				encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	TEST(@"Conversion of ISO 8859-15 to UTF-8",
	TEST(@"Conversion of ISO 8859-15 to Unicode",
	    [[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠšŽžŒœŸ"])

	TEST(@"Conversion of Windows 1252 to UTF-8",
	TEST(@"Conversion of Windows 1252 to Unicode",
	    [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88"
					  "\x89\x8A\x8B\x8C\x8E\x91\x92\x93"
					  "\x94\x95\x96\x97\x98\x99\x9A\x9B"
					  "\x9C\x9E\x9F"
				encoding: OF_STRING_ENCODING_WINDOWS_1252]
	    isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"])

	TEST(@"Conversion of Codepage 437 to UTF-8",
	TEST(@"Conversion of Codepage 437 to Unicode",
	    [[OFString stringWithCString: "\xB0\xB1\xB2\xDB"
				encoding: OF_STRING_ENCODING_CODEPAGE_437]
	    isEqual: @"░▒▓█"])

	TEST(@"Conversion of UTF-8 to ASCII #1",
	TEST(@"Conversion of Unicode to ASCII #1",
	    !strcmp([@"This is a test" cStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a test"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ASCII #2",
	EXPECT_EXCEPTION(@"Conversion of Unicode to ASCII #2",
	    OFInvalidEncodingException,
	    [@"This is a tést" cStringWithEncoding: OF_STRING_ENCODING_ASCII])

	TEST(@"Conversion of UTF-8 to ISO-8859-1 #1",
	TEST(@"Conversion of Unicode to ISO-8859-1 #1",
	    !strcmp([@"This is ä test" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 test"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-1 #2",
	EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-1 #2",
	    OFInvalidEncodingException, [@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1])

	TEST(@"Conversion of UTF-8 to ISO-8859-15 #1",
	TEST(@"Conversion of Unicode to ISO-8859-15 #1",
	    !strcmp([@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-15 #2",
	EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-15 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15])

	TEST(@"Conversion of UTF-8 to Windows-1252 #1",
	TEST(@"Conversion of Unicode to Windows-1252 #1",
	    !strcmp([@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to Windows-1252 #2",
	EXPECT_EXCEPTION(@"Conversion of Unicode to Windows-1252 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…‼"
	    cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252])

	TEST(@"Lossy conversion of UTF-8 to ASCII",
	TEST(@"Lossy conversion of Unicode to ASCII",
	    !strcmp([@"This is a tést" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a t?st"))

	TEST(@"Lossy conversion of UTF-8 to ISO-8859-1",
	TEST(@"Lossy conversion of Unicode to ISO-8859-1",
	    !strcmp([@"This is ä t€st" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 t?st"))

	TEST(@"Lossy conversion of UTF-8 to ISO-8859-15",
	TEST(@"Lossy conversion of Unicode to ISO-8859-15",
	    !strcmp([@"This is ä t€st…" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st?"))

	TEST(@"Lossy conversion of UTF-8 to Windows-1252",
	TEST(@"Lossy conversion of Unicode to Windows-1252",
	    !strcmp([@"This is ä t€st…‼" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85?"))

	TEST(@"+[stringWithFormat:]",
	    [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123])
	    isEqual: @"test:123"])