ObjFW  Check-in [0ea758cd9c]

Overview
Comment:Add support for C strings encoded in ISO 8859-15.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 0ea758cd9c3b327b79990d6503b2bc3fb2d66c7566295c98988fb800720cbb4e
User & Date: js on 2009-07-20 18:39:51
Other Links: manifest | tags
Context
2009-07-20
23:20
Add support for C strings encoded in Windows-1252. check-in: e04c359780 user: js tags: trunk
18:39
Add support for C strings encoded in ISO 8859-15. check-in: 0ea758cd9c user: js tags: trunk
2009-07-19
20:39
Add -[xmlParser:foundUnknownEntityNamed:] to OFXMLParserDelegate. check-in: 83c351c9fd user: js tags: trunk
Changes

Modified src/OFString.h from [3f6a6e3d31] to [d669b9c0c5].

13
14
15
16
17
18
19
20


21
22
23
24
25
26
27
13
14
15
16
17
18
19

20
21
22
23
24
25
26
27
28







-
+
+







#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1
	OF_STRING_ENCODING_ISO_8859_1,
	OF_STRING_ENCODING_ISO_8859_15,
};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.

Modified src/OFString.m from [2a8be01528] to [472df8f045].

24
25
26
27
28
29
30


31
32
33
34
35
36
37
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39







+
+








#import "OFString.h"
#import "OFAutoreleasePool.h"
#import "OFExceptions.h"
#import "OFMacros.h"

#import "asprintf.h"

#import "encodings/iso_8859_15.h"

/* References for static linking */
void _references_to_categories_of_OFString()
{
	_OFHashing_reference = 1;
	_OFURLEncoding_reference = 1;
	_OFXMLElement_reference = 1;
245
246
247
248
249
250
251





252
253

254
255
256
257
258
259
260
261

262

263
264
265













266

267
268
269
270
271
272











273

274
275
276







277
278

279
280
281
282
283

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300


301
302
303
304
305
306
307





308
309

310
311
312
313
314
315
316
247
248
249
250
251
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

287






288
289
290
291
292
293
294
295
296
297
298
299
300
301


302
303
304
305
306
307
308
309

310
311
312
313
314

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330


331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

346
347
348
349
350
351
352
353







+
+
+
+
+

-
+








+
-
+



+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

+

-
-
+
+
+
+
+
+
+

-
+




-
+















-
-
+
+







+
+
+
+
+

-
+







	switch (encoding) {
	case OF_STRING_ENCODING_UTF_8:
		switch (of_string_check_utf8(str, length)) {
		case 1:
			is_utf8 = YES;
			break;
		case -1:
			/*
			 * We can't use [super dealloc] on OS X here.
			 * Compiler bug? Anyway, [self dealloc] will do here as
			 * we don't reimplement dealloc.
			 */
			c = isa;
			[super dealloc];
			[self dealloc];
			@throw [OFInvalidEncodingException newWithClass: c];
		}

		memcpy(string, str, length);
		string[length] = 0;

		break;
	case OF_STRING_ENCODING_ISO_8859_1:
	case OF_STRING_ENCODING_ISO_8859_15:
		for (i = j = 0; i < length; i++) {
		for (i = j = 0; i < len; i++) {
			if ((uint8_t)str[i] < 0x80)
				string[j++] = str[i];
			else {
				char buf[4];
				uint32_t chr;
				size_t chr_bytes;

				switch (encoding) {
				case OF_STRING_ENCODING_ISO_8859_1:
					chr = (uint8_t)str[i];
					break;
				case OF_STRING_ENCODING_ISO_8859_15:
					chr = iso_8859_15_to_unicode[
					    (uint8_t)str[i]];
					break;
				default:
				/*
					/*
				 * ISO 8859-1 can only have 2 bytes when encoded
				 * as UTF-8, nevertheless, let's be on the safe
				 * side.
				 */
				char buf[4];

					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					c = isa;
					[self dealloc];
					@throw [OFInvalidEncodingException
					    newWithClass: c];
				}

				is_utf8 = YES;
				chr_bytes = of_string_unicode_to_utf8(chr, buf);

				if (of_string_unicode_to_utf8(
				    (uint8_t)str[i], buf) == 0) {
				if (chr_bytes == 0) {
					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					c = isa;
					[super dealloc];
					[self dealloc];
					@throw [OFInvalidEncodingException
					    newWithClass: c];
				}

				length++;
				length += chr_bytes - 1;
				@try {
					string = [self resizeMemory: string
							     toSize: length +
								     1];
				} @catch (OFException *e) {
					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					[self dealloc];
					@throw e;
				}

				string[j++] = buf[0];
				string[j++] = buf[1];
				memcpy(string + j, buf, chr_bytes);
				j += chr_bytes;
			}
		}

		string[length] = 0;

		break;
	default:
		/*
		 * We can't use [super dealloc] on OS X here.
		 * Compiler bug? Anyway, [self dealloc] will do here as we
		 * don't reimplement dealloc.
		 */
		c = isa;
		[super dealloc];
		[self dealloc];
		@throw [OFInvalidEncodingException newWithClass: c];
	}

	return self;
}

- initWithCString: (const char*)str

Added src/encodings/iso_8859_15.h version [2ef850307a].



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
static uint32_t iso_8859_15_to_unicode[256] = {
	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
	0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
	0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
	0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
	0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
};

Modified tests/OFString/OFString.m from [3c317544a3] to [74ea65afd6].

20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34







-
+








#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 69
#define NUM_TESTS 70
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\
115
116
117
118
119
120
121




122
123
124
125
126
127
128
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132







+
+
+
+







	CHECK([[s1 reverse] isEqual: @"π„žβ‚¬ΓΌΓΆΓ€"])
	[s1 dealloc];

	/* ISO-8859-1 tests */
	CHECK([[OFString stringWithCString: "\xE4\xF6\xFC"
				 encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"Àâü"])

	CHECK([[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				  encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠőŽžŒœŸ"])

	/* Format tests */
	s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123];
	CHECK([s1 isEqual: @"test: 123"])

	[s1 appendWithFormat: @"%02X", 15];
	CHECK([s1 isEqual: @"test: 1230F"])