ObjFW  Check-in [7c26551b67]

Overview
Comment:Clean up Unicode -> * conversions.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 7c26551b673324c22797a275f3aec6c8e36d7f8d6e311ca5283418b5abae4d6c
User & Date: js on 2014-01-19 14:10:27
Other Links: manifest | tags
Context
2014-01-19
14:17
Add Unicode -> Codepage 437 conversion. check-in: e66defc073 user: js tags: trunk
14:10
Clean up Unicode -> * conversions. check-in: 7c26551b67 user: js tags: trunk
12:00
Add lookup-asm-ppc-macho.S. check-in: d80d091b0e user: js tags: trunk
Changes

Modified src/OFString.m from [17a63459d4] to [04e1844f94].

1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
			case 3:
			case 4:
				memcpy(cString + j, buffer, len);
				j += len;

				break;
			default:
				if (lossy)
					cString[j++] = '?';
				else
					@throw [OFInvalidEncodingException
					    exception];

				break;
			}
		}

		cString[j] = '\0';








<
<
<
|
<







1007
1008
1009
1010
1011
1012
1013



1014

1015
1016
1017
1018
1019
1020
1021
			case 3:
			case 4:
				memcpy(cString + j, buffer, len);
				j += len;

				break;
			default:



				@throw [OFInvalidEncodingException exception];


				break;
			}
		}

		cString[j] = '\0';

Modified src/iso_8859_15.m from [25eed555d5] to [efb74e1c28].

42
43
44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
61
62
63
64
65
    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c == 0xA4 || c == 0xA6 || c == 0xA8 ||
		    c == 0xB4 || c == 0xB8 || c == 0xBC || c == 0xBD ||
		    c == 0xBE || c > 0xFFFF) {
			if (lossy)
				output[i] = '?';

			else
				return false;
		}

		if OF_UNLIKELY (c > 0xFF) {
			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0xA4;
				break;
			case 0x160:
				output[i] = 0xA6;
				break;







|
<
|
|
|
>
|
|
|

<







42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57

58
59
60
61
62
63
64
    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c > 0xFF) {

			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}


			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0xA4;
				break;
			case 0x160:
				output[i] = 0xA6;
				break;
85
86
87
88
89
90
91
92
















93



94
95
96
97
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}
		} else
















			output[i] = (uint8_t)c;



	}

	return true;
}







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>




84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}
		} else {
			switch (c) {
			case 0xA4:
			case 0xA6:
			case 0xA8:
			case 0xB4:
			case 0xB8:
			case 0xBC:
			case 0xBD:
			case 0xBE:
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			default:
				output[i] = (uint8_t)c;
				break;
			}
		}
	}

	return true;
}

Modified src/windows_1252.m from [efb7bb469a] to [6e0427f4c2].

42
43
44
45
46
47
48
49

50
51

52
53
54
55
56
57
58
59
60
61
62
63
    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY ((c >= 0x80 && c <= 0x9F) || c > 0xFFFF) {

			if (lossy)
				output[i] = '?';

			else
				return false;
		}

		if OF_UNLIKELY (c > 0xFF) {
			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0x80;
				break;
			case 0x201A:
				output[i] = 0x82;
				break;







|
>
|
|
>
|
|
|

<







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

58
59
60
61
62
63
64
    size_t length, bool lossy)
{
	size_t i;

	for (i = 0; i < length; i++) {
		of_unichar_t c = input[i];

		if OF_UNLIKELY (c > 0xFF) {
			if OF_UNLIKELY (c > 0xFFFF) {
				if (lossy) {
					output[i] = '?';
					continue;
				} else
					return false;
			}


			switch ((of_char16_t)c) {
			case 0x20AC:
				output[i] = 0x80;
				break;
			case 0x201A:
				output[i] = 0x82;
				break;
140
141
142
143
144
145
146






147
148

149
150
151
152
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}






		} else
			output[i] = (uint8_t)c;

	}

	return true;
}







>
>
>
>
>
>
|
|
>




141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
				if (lossy)
					output[i] = '?';
				else
					return false;

				break;
			}
		} else {
			if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) {
				if (lossy)
					output[i] = '?';
				else
					return false;
			} else
				output[i] = (uint8_t)c;
		}
	}

	return true;
}

Modified tests/OFStringTests.m from [b2e299ece7] to [da479fb931].

17
18
19
20
21
22
23

24
25
26
27
28
29
30
#include "config.h"

#include <stdlib.h>
#include <string.h>
#include <math.h>

#import "OFString.h"

#import "OFArray.h"
#import "OFURL.h"
#import "OFAutoreleasePool.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"







>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include "config.h"

#include <stdlib.h>
#include <string.h>
#include <math.h>

#import "OFString.h"
#import "OFMutableString_UTF8.h"
#import "OFArray.h"
#import "OFURL.h"
#import "OFAutoreleasePool.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xE0\x80"])
	EXPECT_EXCEPTION(@"Detection of invalid UTF-8 encoding #2",
	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xF0\x80\x80\xC0"])

	TEST(@"-[reverse] on UTF-8 strings",
	    (s[0] = [OFMutableString stringWithUTF8String: "äöü€𝄞"]) &&
	    R([s[0] reverse]) && [s[0] isEqual: @"𝄞€üöä"])

	TEST(@"Conversion of ISO 8859-1 to UTF-8",
	    [[OFString stringWithCString: "\xE4\xF6\xFC"
				encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	TEST(@"Conversion of ISO 8859-15 to UTF-8",
	    [[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠšŽžŒœŸ"])

	TEST(@"Conversion of Windows 1252 to UTF-8",
	    [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88"
					  "\x89\x8A\x8B\x8C\x8E\x91\x92\x93"
					  "\x94\x95\x96\x97\x98\x99\x9A\x9B"
					  "\x9C\x9E\x9F"
				encoding: OF_STRING_ENCODING_WINDOWS_1252]
	    isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"])

	TEST(@"Conversion of Codepage 437 to UTF-8",
	    [[OFString stringWithCString: "\xB0\xB1\xB2\xDB"
				encoding: OF_STRING_ENCODING_CODEPAGE_437]
	    isEqual: @"░▒▓█"])

	TEST(@"Conversion of UTF-8 to ASCII #1",
	    !strcmp([@"This is a test" cStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a test"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ASCII #2",
	    OFInvalidEncodingException,
	    [@"This is a tést" cStringWithEncoding: OF_STRING_ENCODING_ASCII])

	TEST(@"Conversion of UTF-8 to ISO-8859-1 #1",
	    !strcmp([@"This is ä test" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 test"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-1 #2",
	    OFInvalidEncodingException, [@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1])

	TEST(@"Conversion of UTF-8 to ISO-8859-15 #1",
	    !strcmp([@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-15 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15])

	TEST(@"Conversion of UTF-8 to Windows-1252 #1",
	    !strcmp([@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85"))

	EXPECT_EXCEPTION(@"Conversion of UTF-8 to Windows-1252 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…‼"
	    cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252])

	TEST(@"Lossy conversion of UTF-8 to ASCII",
	    !strcmp([@"This is a tést" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a t?st"))

	TEST(@"Lossy conversion of UTF-8 to ISO-8859-1",
	    !strcmp([@"This is ä t€st" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 t?st"))

	TEST(@"Lossy conversion of UTF-8 to ISO-8859-15",
	    !strcmp([@"This is ä t€st…" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st?"))

	TEST(@"Lossy conversion of UTF-8 to Windows-1252",
	    !strcmp([@"This is ä t€st…‼" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85?"))

	TEST(@"+[stringWithFormat:]",
	    [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123])
	    isEqual: @"test:123"])








|


|




|




|







|




|



|



|



|



|



|



|



|



|



|



|



|







195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xE0\x80"])
	EXPECT_EXCEPTION(@"Detection of invalid UTF-8 encoding #2",
	    OFInvalidEncodingException,
	    [OFString stringWithUTF8String: "\xF0\x80\x80\xC0"])

	TEST(@"-[reverse] on UTF-8 strings",
	    (s[0] = [OFMutableString_UTF8 stringWithUTF8String: "äöü€𝄞"]) &&
	    R([s[0] reverse]) && [s[0] isEqual: @"𝄞€üöä"])

	TEST(@"Conversion of ISO 8859-1 to Unicode",
	    [[OFString stringWithCString: "\xE4\xF6\xFC"
				encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	TEST(@"Conversion of ISO 8859-15 to Unicode",
	    [[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠšŽžŒœŸ"])

	TEST(@"Conversion of Windows 1252 to Unicode",
	    [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88"
					  "\x89\x8A\x8B\x8C\x8E\x91\x92\x93"
					  "\x94\x95\x96\x97\x98\x99\x9A\x9B"
					  "\x9C\x9E\x9F"
				encoding: OF_STRING_ENCODING_WINDOWS_1252]
	    isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"])

	TEST(@"Conversion of Codepage 437 to Unicode",
	    [[OFString stringWithCString: "\xB0\xB1\xB2\xDB"
				encoding: OF_STRING_ENCODING_CODEPAGE_437]
	    isEqual: @"░▒▓█"])

	TEST(@"Conversion of Unicode to ASCII #1",
	    !strcmp([@"This is a test" cStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a test"))

	EXPECT_EXCEPTION(@"Conversion of Unicode to ASCII #2",
	    OFInvalidEncodingException,
	    [@"This is a tést" cStringWithEncoding: OF_STRING_ENCODING_ASCII])

	TEST(@"Conversion of Unicode to ISO-8859-1 #1",
	    !strcmp([@"This is ä test" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 test"))

	EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-1 #2",
	    OFInvalidEncodingException, [@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1])

	TEST(@"Conversion of Unicode to ISO-8859-15 #1",
	    !strcmp([@"This is ä t€st" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st"))

	EXPECT_EXCEPTION(@"Conversion of Unicode to ISO-8859-15 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15])

	TEST(@"Conversion of Unicode to Windows-1252 #1",
	    !strcmp([@"This is ä t€st…" cStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85"))

	EXPECT_EXCEPTION(@"Conversion of Unicode to Windows-1252 #2",
	    OFInvalidEncodingException, [@"This is ä t€st…‼"
	    cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252])

	TEST(@"Lossy conversion of Unicode to ASCII",
	    !strcmp([@"This is a tést" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ASCII], "This is a t?st"))

	TEST(@"Lossy conversion of Unicode to ISO-8859-1",
	    !strcmp([@"This is ä t€st" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 t?st"))

	TEST(@"Lossy conversion of Unicode to ISO-8859-15",
	    !strcmp([@"This is ä t€st…" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st?"))

	TEST(@"Lossy conversion of Unicode to Windows-1252",
	    !strcmp([@"This is ä t€st…‼" lossyCStringWithEncoding:
	    OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85?"))

	TEST(@"+[stringWithFormat:]",
	    [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123])
	    isEqual: @"test:123"])