ObjFW  Check-in [e04c359780]

Overview
Comment:Add support for C strings encoded in Windows-1252.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: e04c35978033770ec23ce55fb4bb31d2c8e5438e7a3f71e1a813a5e9bf293498
User & Date: js on 2009-07-20 23:20:26
Other Links: manifest | tags
Context
2009-07-21
16:08
Fix wrong order of retain / release. check-in: a7b6d69e14 user: js tags: trunk
2009-07-20
23:20
Add support for C strings encoded in Windows-1252. check-in: e04c359780 user: js tags: trunk
18:39
Add support for C strings encoded in ISO 8859-15. check-in: 0ea758cd9c user: js tags: trunk
Changes

Modified src/OFString.h from [d669b9c0c5] to [070cca9ea4].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
#import "OFObject.h"
#import "OFArray.h"

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1,
	OF_STRING_ENCODING_ISO_8859_15,

};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.







>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#import "OFObject.h"
#import "OFArray.h"

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1,
	OF_STRING_ENCODING_ISO_8859_15,
	OF_STRING_ENCODING_WINDOWS_1252
};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.

Modified src/OFString.m from [472df8f045] to [7a4ce00abf].

26
27
28
29
30
31
32

33
34
35
36
37
38
39
#import "OFAutoreleasePool.h"
#import "OFExceptions.h"
#import "OFMacros.h"

#import "asprintf.h"

#import "encodings/iso_8859_15.h"


/* References for static linking */
void _references_to_categories_of_OFString()
{
	_OFHashing_reference = 1;
	_OFURLEncoding_reference = 1;
	_OFXMLElement_reference = 1;







>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#import "OFAutoreleasePool.h"
#import "OFExceptions.h"
#import "OFMacros.h"

#import "asprintf.h"

#import "encodings/iso_8859_15.h"
#import "encodings/windows_1252.h"

/* References for static linking */
void _references_to_categories_of_OFString()
{
	_OFHashing_reference = 1;
	_OFURLEncoding_reference = 1;
	_OFXMLElement_reference = 1;
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285




286













287
288
289
290
291
292
293

		memcpy(string, str, length);
		string[length] = 0;

		break;
	case OF_STRING_ENCODING_ISO_8859_1:
	case OF_STRING_ENCODING_ISO_8859_15:

		for (i = j = 0; i < len; i++) {
			if ((uint8_t)str[i] < 0x80)
				string[j++] = str[i];
			else {
				char buf[4];
				uint32_t chr;
				size_t chr_bytes;

				switch (encoding) {
				case OF_STRING_ENCODING_ISO_8859_1:
					chr = (uint8_t)str[i];
					break;
				case OF_STRING_ENCODING_ISO_8859_15:
					chr = iso_8859_15_to_unicode[
					    (uint8_t)str[i]];
					break;




				default:













					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					c = isa;







>
















>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

		memcpy(string, str, length);
		string[length] = 0;

		break;
	case OF_STRING_ENCODING_ISO_8859_1:
	case OF_STRING_ENCODING_ISO_8859_15:
	case OF_STRING_ENCODING_WINDOWS_1252:
		for (i = j = 0; i < len; i++) {
			if ((uint8_t)str[i] < 0x80)
				string[j++] = str[i];
			else {
				char buf[4];
				uint32_t chr;
				size_t chr_bytes;

				switch (encoding) {
				case OF_STRING_ENCODING_ISO_8859_1:
					chr = (uint8_t)str[i];
					break;
				case OF_STRING_ENCODING_ISO_8859_15:
					chr = iso_8859_15_to_unicode[
					    (uint8_t)str[i]];
					break;
				case OF_STRING_ENCODING_WINDOWS_1252:
					chr = windows_1252_to_unicode[
					    (uint8_t)str[i]];
					break;
				default:
					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					c = isa;
					[self dealloc];
					@throw [OFInvalidEncodingException
					    newWithClass: c];
				}

				if (chr == 0xFFFD) {
					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					c = isa;

Modified src/encodings/iso_8859_15.h from [2ef850307a] to [fe0487da8d].












1
2
3
4
5
6
7
8











static uint32_t iso_8859_15_to_unicode[256] = {
	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
>
>
>
>
>
>
>
>
>
>
>
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
 * Copyright (c) 2008 - 2009
 *   Jonathan Schleifer <js@webkeks.org>
 *
 * All rights reserved.
 *
 * This file is part of libobjfw. It may be distributed under the terms of the
 * Q Public License 1.0, which can be found in the file LICENSE included in
 * the packaging of this file.
 */

static uint16_t iso_8859_15_to_unicode[256] = {
	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,

Added src/encodings/windows_1252.h version [b20dd0f9cd].



























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/*
 * Copyright (c) 2008 - 2009
 *   Jonathan Schleifer <js@webkeks.org>
 *
 * All rights reserved.
 *
 * This file is part of libobjfw. It may be distributed under the terms of the
 * Q Public License 1.0, which can be found in the file LICENSE included in
 * the packaging of this file.
 */

static uint16_t windows_1252_to_unicode[256] = {
	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
	0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
	0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
	0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
	0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
	0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
	0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
};

Modified tests/OFString/OFString.m from [74ea65afd6] to [2fbf2def3c].

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 70
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 71
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125






126
127
128
129
130
131
132
	CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xF0\x80\x80\xC0"],
	    OFInvalidEncodingException)

	s1 = [OFMutableString stringWithCString: "äöü€𝄞"];
	CHECK([[s1 reverse] isEqual: @"𝄞€üöä"])
	[s1 dealloc];

	/* ISO-8859-1 tests */
	CHECK([[OFString stringWithCString: "\xE4\xF6\xFC"
				 encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	CHECK([[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				  encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠšŽžŒœŸ"])







	/* Format tests */
	s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123];
	CHECK([s1 isEqual: @"test: 123"])

	[s1 appendWithFormat: @"%02X", 15];
	CHECK([s1 isEqual: @"test: 1230F"])







|







>
>
>
>
>
>







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xF0\x80\x80\xC0"],
	    OFInvalidEncodingException)

	s1 = [OFMutableString stringWithCString: "äöü€𝄞"];
	CHECK([[s1 reverse] isEqual: @"𝄞€üöä"])
	[s1 dealloc];

	/* Encoding tests */
	CHECK([[OFString stringWithCString: "\xE4\xF6\xFC"
				 encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	CHECK([[OFString stringWithCString: "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"
				  encoding: OF_STRING_ENCODING_ISO_8859_15]
	    isEqual: @"€ŠšŽžŒœŸ"])
	CHECK([[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88"
					    "\x89\x8A\x8B\x8C\x8E\x91\x92\x93"
					    "\x94\x95\x96\x97\x98\x99\x9A\x9B"
					    "\x9C\x9E\x9F"
				  encoding: OF_STRING_ENCODING_WINDOWS_1252]
	    isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"])

	/* Format tests */
	s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123];
	CHECK([s1 isEqual: @"test: 123"])

	[s1 appendWithFormat: @"%02X", 15];
	CHECK([s1 isEqual: @"test: 1230F"])