ObjFW  Check-in [a80e9b948d]

Overview
Comment:Also parse &#NNNN; and &#xHHHH; in -[stringByXMLUnescaping].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: a80e9b948d0a26416f5fd37987241f4ca163be6cf936b5cd3de8d151ada0a6ce
User & Date: js on 2009-07-14 21:58:22
Other Links: manifest | tags
Context
2009-07-16
23:02
Also handle '+' in -[stringByURLDecoding]. check-in: 92d8754e02 user: js tags: trunk
2009-07-14
21:58
Also parse &#NNNN; and &#xHHHH; in -[stringByXMLUnescaping]. check-in: a80e9b948d user: js tags: trunk
21:22
Add of_string_unicode_to_utf8 which converts unicode to UTF-8. check-in: de937a62e4 user: js tags: trunk
Changes

Modified src/OFString.h from [42972211ab] to [292d36265a].

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, uint8_t*);

/**
 * A class for managing strings.
 */
@interface OFString: OFObject <OFCopying, OFMutableCopying>
{
	char	     *string;







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(uint32_t, char*);

/**
 * A class for managing strings.
 */
@interface OFString: OFObject <OFCopying, OFMutableCopying>
{
	char	     *string;

Modified src/OFString.m from [2bac621bba] to [c4e41efb24].

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

	madvise((void*)str, len, MADV_NORMAL);

	return utf8;
}

size_t
of_string_unicode_to_utf8(uint32_t c, uint8_t *buf)
{
	if (c < 0x80) {
		buf[0] = c;
		return 1;
	}
	if (c < 0x800) {
		buf[0] = 0xC0 | (c >> 6);







|







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

	madvise((void*)str, len, MADV_NORMAL);

	return utf8;
}

size_t
of_string_unicode_to_utf8(uint32_t c, char *buf)
{
	if (c < 0x80) {
		buf[0] = c;
		return 1;
	}
	if (c < 0x800) {
		buf[0] = 0xC0 | (c >> 6);

Modified src/OFXMLParser.m from [b77468664e] to [8929707b71].

12
13
14
15
16
17
18

19
20















































21
22
23
24
25
26
27
#include "config.h"

#include <string.h>

#import "OFXMLParser.h"
#import "OFAutoreleasePool.h"
#import "OFExceptions.h"


int _OFXMLParser_reference;
















































@implementation OFXMLParser
+ xmlParser
{
	return [[[self alloc] init] autorelease];
}








>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include "config.h"

#include <string.h>

#import "OFXMLParser.h"
#import "OFAutoreleasePool.h"
#import "OFExceptions.h"
#import "OFMacros.h"

int _OFXMLParser_reference;

static OF_INLINE OFString*
parse_numeric_entity(char *entity, size_t length)
{
	uint32_t c;
	size_t i;
	char buf[4];

	if (length == 1 || *entity != '#')
		return nil;

	c = 0;
	entity++;
	length--;

	if (entity[0] == 'x') {
		if (length == 1)
			return nil;

		entity++;
		length--;

		for (i = 0; i < length; i++) {
			if (entity[i] >= '0' && entity[i] <= '9')
				c = (c * 0x10) + (entity[i] - '0');
			else if (entity[i] >= 'A' && entity[i] <= 'F')
				c = (c * 0x10) + (entity[i] - 'A' + 10);
			else if (entity[i] >= 'a' && entity[i] <= 'f')
				c = (c * 0x10) + (entity[i] - 'A' + 10);
			else
				return nil;
		}
	} else {
		for (i = 0; i < length; i++) {
			if (entity[i] >= '0' && entity[i] <= '9')
				c = (c * 10) + (entity[i] - '0');
			else
				return nil;
		}
	}

	if ((i = of_string_unicode_to_utf8(c, buf)) == 0)
		return nil;

	return [OFString stringWithCString: buf
				 andLength: i];
}

@implementation OFXMLParser
+ xmlParser
{
	return [[[self alloc] init] autorelease];
}

59
60
61
62
63
64
65

66
67
68
69
70
71
72
73
74
75
76
77













78
79
80
81
82
83
84
85
86
87
88
89
90
91
		if (!in_entity && string[i] == '&') {
			[ret appendCStringWithoutUTF8Checking: string + last
						    andLength: i - last];

			last = i + 1;
			in_entity = YES;
		} else if (in_entity && string[i] == ';') {

			size_t len = i - last;

			if (len == 2 && !memcmp(string + last, "lt", 2))
				[ret appendString: @"<"];
			else if (len == 2 && !memcmp(string + last, "gt", 2))
				[ret appendString: @">"];
			else if (len == 4 && !memcmp(string + last, "quot", 4))
				[ret appendString: @"\""];
			else if (len == 4 && !memcmp(string + last, "apos", 4))
				[ret appendString: @"'"];
			else if (len == 3 && !memcmp(string + last, "amp", 3))
				[ret appendString: @"&"];













			else if (h != nil) {
				OFAutoreleasePool *pool;
				OFString *n, *tmp;

				pool = [[OFAutoreleasePool alloc] init];

				n = [OFString stringWithCString: string + last
						      andLength: len];
				tmp = [h foundUnknownEntityNamed: n];

				if (tmp == nil)
					@throw [OFInvalidEncodingException
					    newWithClass: isa];








>


|

|

|

|

|

>
>
>
>
>
>
>
>
>
>
>
>
>
|





|







107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
		if (!in_entity && string[i] == '&') {
			[ret appendCStringWithoutUTF8Checking: string + last
						    andLength: i - last];

			last = i + 1;
			in_entity = YES;
		} else if (in_entity && string[i] == ';') {
			char *entity = string + last;
			size_t len = i - last;

			if (len == 2 && !memcmp(entity, "lt", 2))
				[ret appendString: @"<"];
			else if (len == 2 && !memcmp(entity, "gt", 2))
				[ret appendString: @">"];
			else if (len == 4 && !memcmp(entity, "quot", 4))
				[ret appendString: @"\""];
			else if (len == 4 && !memcmp(entity, "apos", 4))
				[ret appendString: @"'"];
			else if (len == 3 && !memcmp(entity, "amp", 3))
				[ret appendString: @"&"];
			else if (entity[0] == '#') {
				OFAutoreleasePool *pool;
				OFString *tmp;

				pool = [[OFAutoreleasePool alloc] init];
				tmp = parse_numeric_entity(entity, len);

				if (tmp == nil)
					@throw [OFInvalidEncodingException
					    newWithClass: isa];

				[ret appendString: tmp];
				[pool release];
			} else if (h != nil) {
				OFAutoreleasePool *pool;
				OFString *n, *tmp;

				pool = [[OFAutoreleasePool alloc] init];

				n = [OFString stringWithCString: entity
						      andLength: len];
				tmp = [h foundUnknownEntityNamed: n];

				if (tmp == nil)
					@throw [OFInvalidEncodingException
					    newWithClass: isa];

Modified tests/OFString/OFString.m from [b49062f26b] to [2dbe4533ed].

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 53
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

#ifndef _WIN32
#define ZD "%zd"
#else
#define ZD "%u"
#endif

#define NUM_TESTS 60
#define SUCCESS								\
	printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m",	\
	    (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS);		\
	fflush(stdout);
#define FAIL								\
	printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n",	\
	    i + 1, NUM_TESTS);						\
197
198
199
200
201
202
203











204
205
206
207
208

	h = [[EntityHandler alloc] init];
	s1 = [@"x&foo;y" stringByXMLUnescapingWithHandler: h];
	CHECK([s1 isEqual: @"xbary"]);

	CHECK_EXCEPT([@"x&amp" stringByXMLUnescaping],
	    OFInvalidEncodingException)












	puts("");

	return 0;
}







>
>
>
>
>
>
>
>
>
>
>





197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

	h = [[EntityHandler alloc] init];
	s1 = [@"x&foo;y" stringByXMLUnescapingWithHandler: h];
	CHECK([s1 isEqual: @"xbary"]);

	CHECK_EXCEPT([@"x&amp" stringByXMLUnescaping],
	    OFInvalidEncodingException)

	CHECK([[@"&#x79;" stringByXMLUnescaping] isEqual: @"y"]);
	CHECK([[@"&#xE4;" stringByXMLUnescaping] isEqual: @"ä"]);
	CHECK([[@"&#8364;" stringByXMLUnescaping] isEqual: @"€"]);
	CHECK([[@"&#x1D11E;" stringByXMLUnescaping] isEqual: @"𝄞"]);

	CHECK_EXCEPT([@"&#;" stringByXMLUnescaping], OFInvalidEncodingException)
	CHECK_EXCEPT([@"&#x;" stringByXMLUnescaping],
	    OFInvalidEncodingException)
	CHECK_EXCEPT([@"&#xg;" stringByXMLUnescaping],
	    OFInvalidEncodingException)

	puts("");

	return 0;
}