ObjFW  Check-in [8216fb9343]

Overview
Comment:Add support for C strings encoded in ISO 8859-1.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 8216fb93437a87c76dd63e2363a95f294666df34265d86d9e85eb212e7a98ff5
User & Date: js on 2009-07-19 18:14:48
Other Links: manifest | tags
Context
2009-07-19
20:39
Add -[xmlParser:foundUnknownEntityNamed:] to OFXMLParserDelegate. check-in: 83c351c9fd user: js tags: trunk
18:14
Add support for C strings encoded in ISO 8859-1. check-in: 8216fb9343 user: js tags: trunk
17:02
Prevent one more wrong call to an init method. check-in: 95aa3a147e user: js tags: trunk
Changes

Modified src/OFConstString.m from [8dadde45bc] to [635f4f5681].

36
37
38
39
40
41
42















43
44
45
46
47
48
49
- init
{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str















{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str
           length: (size_t)len







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
- init
{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str
{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str
	 encoding: (enum of_string_encoding)encoding;
{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str
	 encoding: (enum of_string_encoding)encoding
	   length: (size_t)len
{
	@throw [OFNotImplementedException newWithClass: isa
					      selector: _cmd];
}

- initWithCString: (const char*)str
           length: (size_t)len

Modified src/OFString.h from [bf71f83fcd] to [3f6a6e3d31].

10
11
12
13
14
15
16





17
18
19
20
21
22
23
 */

#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"






extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.
 */







>
>
>
>
>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 */

#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1
};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.
 */
44
45
46
47
48
49
50























51
52
53
54
55
56
57
 * Creates a new OFString from a UTF-8 encoded C string.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \return A new autoreleased OFString
 */
+ stringWithCString: (const char*)str;
























/**
 * Creates a new OFString from a UTF-8 encoded C string with the specified
 * length.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \param len The length of the string
 * \return A new autoreleased OFString







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
 * Creates a new OFString from a UTF-8 encoded C string.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \return A new autoreleased OFString
 */
+ stringWithCString: (const char*)str;

/**
 * Creates a new OFString from a C string with the specified encoding.
 *
 * \param str A C string to initialize the OFString with
 * \param encoding The encoding of the C string
 * \return A new autoreleased OFString
 */
+ stringWithCString: (const char*)str
	   encoding: (enum of_string_encoding)encoding;

/**
 * Creates a new OFString from a C string with the specified encoding and
 * length.
 *
 * \param str A C string to initialize the OFString with
 * \param encoding The encoding of the C string
 * \param len The length of the string
 * \return A new autoreleased OFString
 */
+ stringWithCString: (const char*)str
	   encoding: (enum of_string_encoding)encoding
	     length: (size_t)len;

/**
 * Creates a new OFString from a UTF-8 encoded C string with the specified
 * length.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \param len The length of the string
 * \return A new autoreleased OFString
87
88
89
90
91
92
93
























94
95
96
97
98
99
100
 * Initializes an already allocated OFString from a UTF-8 encoded C string.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \return An initialized OFString
 */
- initWithCString: (const char*)str;

























/**
 * Initializes an already allocated OFString from a UTF-8 encoded C string with
 * the specified length.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \param len The length of the string
 * \return An initialized OFString







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
 * Initializes an already allocated OFString from a UTF-8 encoded C string.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \return An initialized OFString
 */
- initWithCString: (const char*)str;

/**
 * Initializes an already allocated OFString from a C string with the specified
 * encoding.
 *
 * \param str A C string to initialize the OFString with
 * \param encoding The encoding of the C string
 * \return An initialized OFString
 */
- initWithCString: (const char*)str
	 encoding: (enum of_string_encoding)encoding;

/**
 * Initializes an already allocated OFString from a C string with the specified
 * encoding and length.
 *
 * \param str A C string to initialize the OFString with
 * \param encoding The encoding of the C string
 * \param len The length of the string
 * \return An initialized OFString
 */
- initWithCString: (const char*)str
	 encoding: (enum of_string_encoding)encoding
	   length: (size_t)len;

/**
 * Initializes an already allocated OFString from a UTF-8 encoded C string with
 * the specified length.
 *
 * \param str A UTF-8 encoded C string to initialize the OFString with
 * \param len The length of the string
 * \return An initialized OFString

Modified src/OFString.m from [1a350e2ba4] to [2a8be01528].

143
144
145
146
147
148
149
















150
151
152
153
154
155
156
	return [[[self alloc] init] autorelease];
}

+ stringWithCString: (const char*)str
{
	return [[[self alloc] initWithCString: str] autorelease];
}

















+ stringWithCString: (const char*)str
	     length: (size_t)len
{
	return [[[self alloc] initWithCString: str
				       length: len] autorelease];
}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
	return [[[self alloc] init] autorelease];
}

+ stringWithCString: (const char*)str
{
	return [[[self alloc] initWithCString: str] autorelease];
}

+ stringWithCString: (const char*)str
	   encoding: (enum of_string_encoding)encoding
{
	return [[[self alloc] initWithCString: str
				     encoding: encoding] autorelease];
}

+ stringWithCString: (const char*)str
	   encoding: (enum of_string_encoding)encoding
	     length: (size_t)len
{
	return [[[self alloc] initWithCString: str
				     encoding: encoding
				       length: len] autorelease];
}

+ stringWithCString: (const char*)str
	     length: (size_t)len
{
	return [[[self alloc] initWithCString: str
				       length: len] autorelease];
}
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218


219
220
221

222
223
224

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256













257
258
259





















































260
261








262
263
264
265
266
267
268
	string = NULL;

	return self;
}

- initWithCString: (const char*)str
{
	Class c;

	self = [super init];

	if (str != NULL) {
		length = strlen(str);

		switch (of_string_check_utf8(str, length)) {
		case 1:
			is_utf8 = YES;
			break;
		case -1:
			c = isa;
			[super dealloc];
			@throw [OFInvalidEncodingException newWithClass: c];
		}

		@try {
			string = [self allocMemoryWithSize: length + 1];
		} @catch (OFException *e) {
			/*
			 * We can't use [super dealloc] on OS X here.
			 * Compiler bug? Anyway, [self dealloc] will do here as
			 * we don't reimplement dealloc.
			 */
			[self dealloc];
			@throw e;
		}
		memcpy(string, str, length + 1);
	}

	return self;


}

- initWithCString: (const char*)str

	   length: (size_t)len
{
	Class c;


	self = [super init];

	if (len > strlen(str)) {
		c = isa;
		[super dealloc];
		@throw [OFOutOfRangeException newWithClass: c];
	}

	length = len;

	switch (of_string_check_utf8(str, length)) {
	case 1:
		is_utf8 = YES;
		break;
	case -1:
		c = isa;
		[super dealloc];
		@throw [OFInvalidEncodingException newWithClass: c];
	}

	@try {
		string = [self allocMemoryWithSize: length + 1];
	} @catch (OFException *e) {
		/*
		 * We can't use [super dealloc] on OS X here.
		 * Compiler bug? Anyway, [self dealloc] will do here as
		 * we don't reimplement dealloc.
		 */
		[self dealloc];
		@throw e;
	}













	memcpy(string, str, length);
	string[length] = 0;






















































	return self;
}









- initWithFormat: (OFString*)fmt, ...
{
	id ret;
	va_list args;

	va_start(args, fmt);







<
|
<
|
<
|
|
<
<
<
<
<
<
<
<
|
|
<
<
<
<
<
<
<
<
<
<
<
|
<
|
|
>
>



>



>











<
<
<
<
<
<
<
<
<
<





|
|




>
>
>
>
>
>
>
>
>
>
>
>
>
|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
>
>
>
>
>
>
>







196
197
198
199
200
201
202

203

204

205
206








207
208











209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232










233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
	string = NULL;

	return self;
}

- initWithCString: (const char*)str
{

	return [self initWithCString: str

			    encoding: OF_STRING_ENCODING_UTF_8

			      length: strlen(str)];
}









- initWithCString: (const char*)str











	 encoding: (enum of_string_encoding)encoding

{
	return [self initWithCString: str
			    encoding: encoding
			      length: strlen(str)];
}

- initWithCString: (const char*)str
	 encoding: (enum of_string_encoding)encoding
	   length: (size_t)len
{
	Class c;
	size_t i, j;

	self = [super init];

	if (len > strlen(str)) {
		c = isa;
		[super dealloc];
		@throw [OFOutOfRangeException newWithClass: c];
	}

	length = len;











	@try {
		string = [self allocMemoryWithSize: length + 1];
	} @catch (OFException *e) {
		/*
		 * We can't use [super dealloc] on OS X here.
		 * Compiler bug? Anyway, [self dealloc] will do here as we
		 * don't reimplement dealloc.
		 */
		[self dealloc];
		@throw e;
	}

	switch (encoding) {
	case OF_STRING_ENCODING_UTF_8:
		switch (of_string_check_utf8(str, length)) {
		case 1:
			is_utf8 = YES;
			break;
		case -1:
			c = isa;
			[super dealloc];
			@throw [OFInvalidEncodingException newWithClass: c];
		}

		memcpy(string, str, length);
		string[length] = 0;

		break;
	case OF_STRING_ENCODING_ISO_8859_1:
		for (i = j = 0; i < length; i++) {
			if ((uint8_t)str[i] < 0x80)
				string[j++] = str[i];
			else {
				/*
				 * ISO 8859-1 can only have 2 bytes when encoded
				 * as UTF-8, nevertheless, let's be on the safe
				 * side.
				 */
				char buf[4];

				is_utf8 = YES;

				if (of_string_unicode_to_utf8(
				    (uint8_t)str[i], buf) == 0) {
					c = isa;
					[super dealloc];
					@throw [OFInvalidEncodingException
					    newWithClass: c];
				}

				length++;
				@try {
					string = [self resizeMemory: string
							     toSize: length +
								     1];
				} @catch (OFException *e) {
					/*
					 * We can't use [super dealloc] on OS X
					 * here. Compiler bug? Anyway,
					 * [self dealloc] will do here as we
					 * don't reimplement dealloc.
					 */
					[self dealloc];
					@throw e;
				}

				string[j++] = buf[0];
				string[j++] = buf[1];
			}
		}

		string[length] = 0;

		break;
	default:
		c = isa;
		[super dealloc];
		@throw [OFInvalidEncodingException newWithClass: c];
	}

	return self;
}

- initWithCString: (const char*)str
	   length: (size_t)len
{
	return [self initWithCString: str
			    encoding: OF_STRING_ENCODING_UTF_8
			      length: len];
}

- initWithFormat: (OFString*)fmt, ...
{
	id ret;
	va_list args;

	va_start(args, fmt);

Modified tests/OFString/OFString.m from [ba435901f8] to [3c317544a3].

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 68
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 69
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\
111
112
113
114
115
116
117





118
119
120
121
122
123
124
	CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xF0\x80\x80\xC0"],
	    OFInvalidEncodingException)

	s1 = [OFMutableString stringWithCString: "äöü€𝄞"];
	CHECK([[s1 reverse] isEqual: @"𝄞€üöä"])
	[s1 dealloc];






	/* Format tests */
	s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123];
	CHECK([s1 isEqual: @"test: 123"])

	[s1 appendWithFormat: @"%02X", 15];
	CHECK([s1 isEqual: @"test: 1230F"])








>
>
>
>
>







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
	CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xF0\x80\x80\xC0"],
	    OFInvalidEncodingException)

	s1 = [OFMutableString stringWithCString: "äöü€𝄞"];
	CHECK([[s1 reverse] isEqual: @"𝄞€üöä"])
	[s1 dealloc];

	/* ISO-8859-1 tests */
	CHECK([[OFString stringWithCString: "\xE4\xF6\xFC"
				 encoding: OF_STRING_ENCODING_ISO_8859_1]
	    isEqual: @"äöü"])

	/* Format tests */
	s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123];
	CHECK([s1 isEqual: @"test: 123"])

	[s1 appendWithFormat: @"%02X", 15];
	CHECK([s1 isEqual: @"test: 1230F"])