ObjFW  Check-in [e2bbfb45d0]

Overview
Comment:Add +[OFString stringWithUnicodeString:].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: e2bbfb45d037ce9089717a98f96c65c1f3947031bf828de5e3bac6a1f0353607
User & Date: js on 2011-05-01 00:35:20
Other Links: manifest | tags
Context
2011-05-01
12:43
Correctly set isUTF8 in -[OFString initWithUnicodeString:]. check-in: 8e8bb78eb5 user: js tags: trunk
00:35
Add +[OFString stringWithUnicodeString:]. check-in: e2bbfb45d0 user: js tags: trunk
2011-04-30
23:33
Style improvements. check-in: d968d32614 user: js tags: trunk
Changes

Modified src/OFString.h from [7edfd43e32] to [65a04abda1].

37
38
39
40
41
42
43

44
45
46
47
48
49
50
extern "C" {
#endif
extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(of_unichar_t, char*);
extern size_t of_string_utf8_to_unicode(const char*, size_t, of_unichar_t*);
extern size_t of_string_position_to_index(const char*, size_t);
extern size_t of_string_index_to_position(const char*, size_t, size_t);

#ifdef __cplusplus
}
#endif

@class OFArray;
@class OFURL;








>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
extern "C" {
#endif
extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(of_unichar_t, char*);
extern size_t of_string_utf8_to_unicode(const char*, size_t, of_unichar_t*);
extern size_t of_string_position_to_index(const char*, size_t);
extern size_t of_string_index_to_position(const char*, size_t, size_t);
extern size_t of_unicode_string_length(const of_unichar_t*);
#ifdef __cplusplus
}
#endif

@class OFArray;
@class OFURL;

109
110
111
112
113
114
115








116
117
118
119
120
121
122
 * Creates a new OFString from another string.
 *
 * \param string A string to initialize the OFString with
 * \return A new autoreleased OFString
 */
+ stringWithString: (OFString*)string;









/**
 * Creates a new OFString from a format string.
 * See printf for the format syntax.
 *
 * \param format A string used as format to initialize the OFString
 * \return A new autoreleased OFString
 */







>
>
>
>
>
>
>
>







110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
 * Creates a new OFString from another string.
 *
 * \param string A string to initialize the OFString with
 * \return A new autoreleased OFString
 */
+ stringWithString: (OFString*)string;

/**
 * Creates a new OFString from a unicode string.
 *
 * \param string The unicode string
 * \return A new autoreleased OFString
 */
+ stringWithUnicodeString: (of_unichar_t*)string;

/**
 * Creates a new OFString from a format string.
 * See printf for the format syntax.
 *
 * \param format A string used as format to initialize the OFString
 * \return A new autoreleased OFString
 */
221
222
223
224
225
226
227








228
229
230
231
232
233
234
 * Initializes an already allocated OFString with another string.
 *
 * \param string A string to initialize the OFString with
 * \return An initialized OFString
 */
- initWithString: (OFString*)string;









/**
 * Initializes an already allocated OFString with a format string.
 * See printf for the format syntax.
 *
 * \param format A string used as format to initialize the OFString
 * \return An initialized OFString
 */







>
>
>
>
>
>
>
>







230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
 * Initializes an already allocated OFString with another string.
 *
 * \param string A string to initialize the OFString with
 * \return An initialized OFString
 */
- initWithString: (OFString*)string;

/**
 * Initializes an already allocated OFString with a unicode string.
 *
 * \param string The unicode string
 * \return An initialized OFString
 */
- initWithUnicodeString: (of_unichar_t*)string;

/**
 * Initializes an already allocated OFString with a format string.
 * See printf for the format syntax.
 *
 * \param format A string used as format to initialize the OFString
 * \return An initialized OFString
 */

Modified src/OFString.m from [e280f8208a] to [b307aaa0bd].

239
240
241
242
243
244
245











246
247
248
249
250
251
252
	for (i = 0; i <= index; i++)
		if (OF_UNLIKELY((string[i] & 0xC0) == 0x80))
			if (++index > length)
				return OF_INVALID_INDEX;

	return index;
}












@implementation OFString
+ string
{
	return [[[self alloc] init] autorelease];
}








>
>
>
>
>
>
>
>
>
>
>







239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
	for (i = 0; i <= index; i++)
		if (OF_UNLIKELY((string[i] & 0xC0) == 0x80))
			if (++index > length)
				return OF_INVALID_INDEX;

	return index;
}

size_t
of_unicode_string_length(const of_unichar_t *string)
{
	const of_unichar_t *string_ = string;

	while (*string_ != '\0')
		string_++;

	return (uintptr_t)string_ - (uintptr_t)string;
}

@implementation OFString
+ string
{
	return [[[self alloc] init] autorelease];
}

278
279
280
281
282
283
284





285
286
287
288
289
290
291
				       length: length] autorelease];
}

+ stringWithString: (OFString*)string
{
	return [[[self alloc] initWithString: string] autorelease];
}






+ stringWithFormat: (OFString*)format, ...
{
	id ret;
	va_list arguments;

	va_start(arguments, format);







>
>
>
>
>







289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
				       length: length] autorelease];
}

+ stringWithString: (OFString*)string
{
	return [[[self alloc] initWithString: string] autorelease];
}

+ stringWithUnicodeString: (of_unichar_t*)string
{
	return [[[self alloc] initWithUnicodeString: string] autorelease];
}

+ stringWithFormat: (OFString*)format, ...
{
	id ret;
	va_list arguments;

	va_start(arguments, format);
517
518
519
520
521
522
523














































































524
525
526
527
528
529
530
			free(string);
			@throw e;
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}















































































	return self;
}

- initWithFormat: (OFString*)format, ...
{
	id ret;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
			free(string);
			@throw e;
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- initWithUnicodeString: (of_unichar_t*)string_
{
	self = [super init];

	@try {
		char buffer[4];
		size_t i = 0;
		BOOL swap = NO;

		if (*string_ == 0xFEFF)
			string_++;

		if (*string_ == 0xFFFE0000) {
			swap = YES;
			string_++;
		}

		length = of_unicode_string_length(string_);
		string = [self allocMemoryWithSize: length + 1];

		while (*string_ != '\0') {
			size_t characterLen;

			if (swap)
				characterLen = of_string_unicode_to_utf8(
				    of_bswap32(*string_), buffer);
			else
				characterLen = of_string_unicode_to_utf8(
				    *string_, buffer);

			switch (characterLen) {
			case 1:
				string[i++] = buffer[0];
				break;
			case 2:
				length++;
				string = [self resizeMemory: string
						     toSize: length + 1];

				memcpy(string + i, buffer, 2);
				i += 2;

				break;
			case 3:
				length += 2;
				string = [self resizeMemory: string
						     toSize: length + 1];

				memcpy(string + i, buffer, 3);
				i += 3;

				break;
			case 4:
				length += 3;
				string = [self resizeMemory: string
						     toSize: length + 1];

				memcpy(string + i, buffer, 4);
				i += 4;

				break;
			default:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}

			string_++;
		}

		string[i] = '\0';
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- initWithFormat: (OFString*)format, ...
{
	id ret;

Modified tests/OFStringTests.m from [4e36bc7a86] to [ae603ec4d9].

33
34
35
36
37
38
39
40




41
42
43
44
45
46
47
#import "TestsAppDelegate.h"

static OFString *module = @"OFString";
static OFString* whitespace[] = {
	@" \r \t\n\t \tasd  \t \t\t\r\n",
	@" \t\t  \t\t  \t \t"
};
static of_unichar_t ucstr[] = { 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0 };





@interface EntityHandler: OFObject <OFStringXMLUnescapingDelegate>
@end

@implementation EntityHandler
-	   (OFString*)string: (OFString*)string
  containsUnknownEntityNamed: (OFString*)entity







|
>
>
>
>







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#import "TestsAppDelegate.h"

static OFString *module = @"OFString";
static OFString* whitespace[] = {
	@" \r \t\n\t \tasd  \t \t\t\r\n",
	@" \t\t  \t\t  \t \t"
};
static of_unichar_t ucstr[] = { 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0x1F03A, 0 };
static of_unichar_t sucstr[] = {
	0xFFFE0000, 0x66000000, 0xF6000000, 0xF6000000, 0x62000000, 0xE4000000,
	0x72000000, 0x3AF00100, 0
};

@interface EntityHandler: OFObject <OFStringXMLUnescapingDelegate>
@end

@implementation EntityHandler
-	   (OFString*)string: (OFString*)string
  containsUnknownEntityNamed: (OFString*)entity
124
125
126
127
128
129
130
131
132
133






134
135
136
137
138
139
140
	    [[s[0] uppercaseString] isEqual: @"3𝄞1€SÄT"])

	TEST(@"-[lowercaseString]", R([s[0] upper]) &&
	    [[s[0] lowercaseString] isEqual: @"3𝄞1€sät"])

	TEST(@"+[stringWithCString:length:]",
	    (s[0] = [OFMutableString stringWithCString: "\xEF\xBB\xBF" "foobar"
					      length: 6]) &&
	    [s[0] isEqual: @"foo"])







	TEST(@"+[stringWithContentsOfFile:encoding]", (s[1] = [OFString
	    stringWithContentsOfFile: @"testfile.txt"
			    encoding: OF_STRING_ENCODING_ISO_8859_1]) &&
	    [s[1] isEqual: @"testäöü"])

	TEST(@"+[stringWithContentsOfURL:encoding]", (s[1] = [OFString
	    stringWithContentsOfURL: [OFURL URLWithString:







|


>
>
>
>
>
>







128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
	    [[s[0] uppercaseString] isEqual: @"3𝄞1€SÄT"])

	TEST(@"-[lowercaseString]", R([s[0] upper]) &&
	    [[s[0] lowercaseString] isEqual: @"3𝄞1€sät"])

	TEST(@"+[stringWithCString:length:]",
	    (s[0] = [OFMutableString stringWithCString: "\xEF\xBB\xBF" "foobar"
						length: 6]) &&
	    [s[0] isEqual: @"foo"])

	TEST(@"+[stringWithUnicodeString:]",
	    (s[1] = [OFString stringWithUnicodeString: ucstr]) &&
	    [s[1] isEqual: @"fööbär🀺"] &&
	    (s[1] = [OFString stringWithUnicodeString: sucstr]) &&
	    [s[1] isEqual: @"fööbär🀺"])

	TEST(@"+[stringWithContentsOfFile:encoding]", (s[1] = [OFString
	    stringWithContentsOfFile: @"testfile.txt"
			    encoding: OF_STRING_ENCODING_ISO_8859_1]) &&
	    [s[1] isEqual: @"testäöü"])

	TEST(@"+[stringWithContentsOfURL:encoding]", (s[1] = [OFString
	    stringWithContentsOfURL: [OFURL URLWithString:
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370

	EXPECT_EXCEPTION(@"Detect out of range in -[hexadecimalValue]",
	    OFOutOfRangeException,
	    [@"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"
	     @"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"
	    hexadecimalValue])

	TEST(@"-[unicodeString]", (ua = [@"fööbär" unicodeString]) &&
	    !memcmp(ua, ucstr, 7 * sizeof(of_unichar_t)) && R(free(ua)))

	TEST(@"-[MD5Hash]", [[@"asdfoobar" MD5Hash]
	    isEqual: @"184dce2ec49b5422c7cfd8728864db4c"])

	TEST(@"-[SHA1Hash]", [[@"asdfoobar" SHA1Hash]
	    isEqual: @"f5f81ac0a8b5cbfdc4585ec1ad32e7b3a12b9b49"])








|
|







365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380

	EXPECT_EXCEPTION(@"Detect out of range in -[hexadecimalValue]",
	    OFOutOfRangeException,
	    [@"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"
	     @"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"
	    hexadecimalValue])

	TEST(@"-[unicodeString]", (ua = [@"fööbär🀺" unicodeString]) &&
	    !memcmp(ua, ucstr, 8 * sizeof(of_unichar_t)) && R(free(ua)))

	TEST(@"-[MD5Hash]", [[@"asdfoobar" MD5Hash]
	    isEqual: @"184dce2ec49b5422c7cfd8728864db4c"])

	TEST(@"-[SHA1Hash]", [[@"asdfoobar" SHA1Hash]
	    isEqual: @"f5f81ac0a8b5cbfdc4585ec1ad32e7b3a12b9b49"])