ObjFW  Check-in [9b1f138b09]

Overview
Comment:OFXMLParser: Skip BOM.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 9b1f138b096565993559a11de2c97761fcdd89dec4cea6ef6346646075a448c2
User & Date: js on 2013-04-12 07:47:57
Other Links: manifest | tags
Context
2013-04-15
12:52
vasprintf: Create a copy of arguments. check-in: c90e101c7c user: js tags: trunk
2013-04-12
07:47
OFXMLParser: Skip BOM. check-in: 9b1f138b09 user: js tags: trunk
2013-04-10
20:56
OFHTTPServer: Fix an evil typo. check-in: 9ea5f2723f user: js tags: trunk
Changes

Modified src/OFXMLParser.h from [586e50cb7b] to [947e0975da].

123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
 * OFXMLParser is an event-based XML parser which calls the delegate's callbacks
 * as soon asit finds something, thus suitable for streams as well.
 */
@interface OFXMLParser: OFObject <OFStringXMLUnescapingDelegate>
{
	id <OFXMLParserDelegate> _delegate;
	enum {

		OF_XMLPARSER_OUTSIDE_TAG,
		OF_XMLPARSER_TAG_OPENED,
		OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS,
		OF_XMLPARSER_IN_TAG_NAME,
		OF_XMLPARSER_IN_CLOSE_TAG_NAME,
		OF_XMLPARSER_IN_TAG,
		OF_XMLPARSER_IN_ATTR_NAME,
		OF_XMLPARSER_EXPECT_DELIM,
		OF_XMLPARSER_IN_ATTR_VALUE,
		OF_XMLPARSER_EXPECT_CLOSE,
		OF_XMLPARSER_EXPECT_SPACE_OR_CLOSE,
		OF_XMLPARSER_IN_EXCLAMATIONMARK,
		OF_XMLPARSER_IN_CDATA_OPENING,
		OF_XMLPARSER_IN_CDATA_1,
		OF_XMLPARSER_IN_CDATA_2,







>







|







123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
 * OFXMLParser is an event-based XML parser which calls the delegate's callbacks
 * as soon asit finds something, thus suitable for streams as well.
 */
@interface OFXMLParser: OFObject <OFStringXMLUnescapingDelegate>
{
	id <OFXMLParserDelegate> _delegate;
	enum {
		OF_XMLPARSER_IN_BYTE_ORDER_MARK,
		OF_XMLPARSER_OUTSIDE_TAG,
		OF_XMLPARSER_TAG_OPENED,
		OF_XMLPARSER_IN_PROCESSING_INSTRUCTIONS,
		OF_XMLPARSER_IN_TAG_NAME,
		OF_XMLPARSER_IN_CLOSE_TAG_NAME,
		OF_XMLPARSER_IN_TAG,
		OF_XMLPARSER_IN_ATTR_NAME,
		OF_XMLPARSER_EXPECT_DELIMITER,
		OF_XMLPARSER_IN_ATTR_VALUE,
		OF_XMLPARSER_EXPECT_CLOSE,
		OF_XMLPARSER_EXPECT_SPACE_OR_CLOSE,
		OF_XMLPARSER_IN_EXCLAMATIONMARK,
		OF_XMLPARSER_IN_CDATA_OPENING,
		OF_XMLPARSER_IN_CDATA_1,
		OF_XMLPARSER_IN_CDATA_2,

Modified src/OFXMLParser.m from [3a91a054b9] to [3bb79c6f6f].

135
136
137
138
139
140
141
142

143
144
145
146
147
148
149
}

@implementation OFXMLParser
+ (void)initialize
{
	size_t i;

	const SEL selectors_[] = {

		@selector(OF_parseOutsideTagWithBuffer:i:last:),
		@selector(OF_parseTagOpenedWithBuffer:i:last:),
		@selector(OF_parseInProcessingInstructionsWithBuffer:i:last:),
		@selector(OF_parseInTagNameWithBuffer:i:last:),
		@selector(OF_parseInCloseTagNameWithBuffer:i:last:),
		@selector(OF_parseInTagWithBuffer:i:last:),
		@selector(OF_parseInAttributeNameWithBuffer:i:last:),







|
>







135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
}

@implementation OFXMLParser
+ (void)initialize
{
	size_t i;

	const SEL selectors_[OF_XMLPARSER_NUM_STATES] = {
		@selector(OF_parseInByteOrderMarkWithBuffer:i:last:),
		@selector(OF_parseOutsideTagWithBuffer:i:last:),
		@selector(OF_parseTagOpenedWithBuffer:i:last:),
		@selector(OF_parseInProcessingInstructionsWithBuffer:i:last:),
		@selector(OF_parseInTagNameWithBuffer:i:last:),
		@selector(OF_parseInCloseTagNameWithBuffer:i:last:),
		@selector(OF_parseInTagWithBuffer:i:last:),
		@selector(OF_parseInAttributeNameWithBuffer:i:last:),
307
308
309
310
311
312
313





















314
315
316
317
318
319
320
}

/*
 * The following methods handle the different states of the parser. They are
 * looked up in +[initialize] and put in a lookup table to speed things up.
 * One dispatch for every character would be way too slow!
 */






















/* Not in a tag */
- (void)OF_parseOutsideTagWithBuffer: (const char*)buffer
				   i: (size_t*)i
				last: (size_t*)last
{
	size_t length;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
}

/*
 * The following methods handle the different states of the parser. They are
 * looked up in +[initialize] and put in a lookup table to speed things up.
 * One dispatch for every character would be way too slow!
 */

- (void)OF_parseInByteOrderMarkWithBuffer: (const char*)buffer
					i: (size_t*)i
				     last: (size_t*)last
{
	if (buffer[*i] != "\xEF\xBB\xBF"[_level]) {
		if (_level == 0) {
			_state = OF_XMLPARSER_OUTSIDE_TAG;
			(*i)--;
			return;
		}

		@throw [OFMalformedXMLException exceptionWithClass: [self class]
							    parser: self];
	}

	if (_level++ == 2)
		_state = OF_XMLPARSER_OUTSIDE_TAG;

	*last = *i + 1;
}

/* Not in a tag */
- (void)OF_parseOutsideTagWithBuffer: (const char*)buffer
				   i: (size_t*)i
				last: (size_t*)last
{
	size_t length;
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
	}

	objc_autoreleasePoolPop(pool);

	[_buffer removeAllItems];

	*last = *i + 1;
	_state = OF_XMLPARSER_EXPECT_DELIM;
}

/* Expecting delimiter */
- (void)OF_parseExpectDelimiterWithBuffer: (const char*)buffer
					i: (size_t*)i
				     last: (size_t*)last
{







|







825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
	}

	objc_autoreleasePoolPop(pool);

	[_buffer removeAllItems];

	*last = *i + 1;
	_state = OF_XMLPARSER_EXPECT_DELIMITER;
}

/* Expecting delimiter */
- (void)OF_parseExpectDelimiterWithBuffer: (const char*)buffer
					i: (size_t*)i
				     last: (size_t*)last
{

Modified tests/OFXMLParserTests.m from [33a2fe7163] to [4892d444ce].

319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

	return nil;
}

- (void)XMLParserTests
{
	OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init];
	const char *str = "<?xml version='1.0'?><?p?i?>"
	    "<!DOCTYPE <<><<>>>><root>\r\r"
	    " <![CDATA[f<]]]oo]]><bar/>\n"
	    " <foobar xmlns='urn:objfw:test:foobar'>\r\n"
	    "  <qux xmlns:foo='urn:objfw:test:foo'>\n"
	    "   <foo:bla foo:bla = '&#x62;&#x6c;&#x61;' blafoo='foo'>\n"
	    "    <blup foo:qux='asd' quxqux='test'/>\n"
	    "    <bla:bla\r\rxmlns:bla\r=\t\"urn:objfw:test:bla\" qux='qux'\r\n"
	    "     bla:foo='blafoo'/>\n"
	    "    <abc xmlns='urn:objfw:test:abc' abc='abc' foo:abc='abc'/>\n"
	    "   </foo:bla>\n"
	    "   <!-- commänt -->\n"
	    "  </qux>\n"
	    " </foobar>\n"
	    "</root>";
	size_t j, len;

	TEST(@"+[xmlParser]", (parser = [OFXMLParser parser]))

	TEST(@"-[setDelegate:]", R([parser setDelegate: self]))

	/* Simulate a stream where we only get chunks */
	len = strlen(str);

	for (j = 0; j < len; j+= 2) {







|
















|







319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

	return nil;
}

- (void)XMLParserTests
{
	OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init];
	const char *str = "\xEF\xBB\xBF<?xml version='1.0'?><?p?i?>"
	    "<!DOCTYPE <<><<>>>><root>\r\r"
	    " <![CDATA[f<]]]oo]]><bar/>\n"
	    " <foobar xmlns='urn:objfw:test:foobar'>\r\n"
	    "  <qux xmlns:foo='urn:objfw:test:foo'>\n"
	    "   <foo:bla foo:bla = '&#x62;&#x6c;&#x61;' blafoo='foo'>\n"
	    "    <blup foo:qux='asd' quxqux='test'/>\n"
	    "    <bla:bla\r\rxmlns:bla\r=\t\"urn:objfw:test:bla\" qux='qux'\r\n"
	    "     bla:foo='blafoo'/>\n"
	    "    <abc xmlns='urn:objfw:test:abc' abc='abc' foo:abc='abc'/>\n"
	    "   </foo:bla>\n"
	    "   <!-- commänt -->\n"
	    "  </qux>\n"
	    " </foobar>\n"
	    "</root>";
	size_t j, len;

	TEST(@"+[parser]", (parser = [OFXMLParser parser]))

	TEST(@"-[setDelegate:]", R([parser setDelegate: self]))

	/* Simulate a stream where we only get chunks */
	len = strlen(str);

	for (j = 0; j < len; j+= 2) {