Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -14,11 +14,11 @@ #import "OFObject.h" #import "OFArray.h" extern int of_string_check_utf8(const char*, size_t); -extern size_t of_string_unicode_to_utf8(uint32_t, uint8_t*); +extern size_t of_string_unicode_to_utf8(uint32_t, char*); /** * A class for managing strings. */ @interface OFString: OFObject Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -104,11 +104,11 @@ return utf8; } size_t -of_string_unicode_to_utf8(uint32_t c, uint8_t *buf) +of_string_unicode_to_utf8(uint32_t c, char *buf) { if (c < 0x80) { buf[0] = c; return 1; } Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -14,12 +14,60 @@ #include #import "OFXMLParser.h" #import "OFAutoreleasePool.h" #import "OFExceptions.h" +#import "OFMacros.h" int _OFXMLParser_reference; + +static OF_INLINE OFString* +parse_numeric_entity(char *entity, size_t length) +{ + uint32_t c; + size_t i; + char buf[4]; + + if (length == 1 || *entity != '#') + return nil; + + c = 0; + entity++; + length--; + + if (entity[0] == 'x') { + if (length == 1) + return nil; + + entity++; + length--; + + for (i = 0; i < length; i++) { + if (entity[i] >= '0' && entity[i] <= '9') + c = (c * 0x10) + (entity[i] - '0'); + else if (entity[i] >= 'A' && entity[i] <= 'F') + c = (c * 0x10) + (entity[i] - 'A' + 10); + else if (entity[i] >= 'a' && entity[i] <= 'f') + c = (c * 0x10) + (entity[i] - 'A' + 10); + else + return nil; + } + } else { + for (i = 0; i < length; i++) { + if (entity[i] >= '0' && entity[i] <= '9') + c = (c * 10) + (entity[i] - '0'); + else + return nil; + } + } + + if ((i = of_string_unicode_to_utf8(c, buf)) == 0) + return nil; + + return [OFString stringWithCString: buf + andLength: i]; +} @implementation OFXMLParser + xmlParser { return [[[self alloc] init] autorelease]; @@ -61,29 +109,43 @@ andLength: i - last]; last = i + 1; in_entity = YES; } else if (in_entity && string[i] == ';') { + char *entity = string + last; size_t len = i - last; - if (len == 2 && !memcmp(string + last, "lt", 2)) + if (len == 2 && !memcmp(entity, "lt", 2)) [ret appendString: @"<"]; - else if (len == 2 && !memcmp(string + last, "gt", 2)) + else if (len == 2 && !memcmp(entity, "gt", 2)) [ret appendString: @">"]; - else if (len == 4 && !memcmp(string + last, "quot", 4)) + else if (len == 4 && !memcmp(entity, "quot", 4)) [ret appendString: @"\""]; - else if (len == 4 && !memcmp(string + last, "apos", 4)) + else if (len == 4 && !memcmp(entity, "apos", 4)) [ret appendString: @"'"]; - else if (len == 3 && !memcmp(string + last, "amp", 3)) + else if (len == 3 && !memcmp(entity, "amp", 3)) [ret appendString: @"&"]; - else if (h != nil) { + else if (entity[0] == '#') { + OFAutoreleasePool *pool; + OFString *tmp; + + pool = [[OFAutoreleasePool alloc] init]; + tmp = parse_numeric_entity(entity, len); + + if (tmp == nil) + @throw [OFInvalidEncodingException + newWithClass: isa]; + + [ret appendString: tmp]; + [pool release]; + } else if (h != nil) { OFAutoreleasePool *pool; OFString *n, *tmp; pool = [[OFAutoreleasePool alloc] init]; - n = [OFString stringWithCString: string + last + n = [OFString stringWithCString: entity andLength: len]; tmp = [h foundUnknownEntityNamed: n]; if (tmp == nil) @throw [OFInvalidEncodingException Index: tests/OFString/OFString.m ================================================================== --- tests/OFString/OFString.m +++ tests/OFString/OFString.m @@ -22,11 +22,11 @@ #define ZD "%zd" #else #define ZD "%u" #endif -#define NUM_TESTS 53 +#define NUM_TESTS 60 #define SUCCESS \ printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m", \ (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS); \ fflush(stdout); #define FAIL \ @@ -199,10 +199,21 @@ s1 = [@"x&foo;y" stringByXMLUnescapingWithHandler: h]; CHECK([s1 isEqual: @"xbary"]); CHECK_EXCEPT([@"x&" stringByXMLUnescaping], OFInvalidEncodingException) + + CHECK([[@"y" stringByXMLUnescaping] isEqual: @"y"]); + CHECK([[@"ä" stringByXMLUnescaping] isEqual: @"ä"]); + CHECK([[@"€" stringByXMLUnescaping] isEqual: @"€"]); + CHECK([[@"𝄞" stringByXMLUnescaping] isEqual: @"𝄞"]); + + CHECK_EXCEPT([@"&#;" stringByXMLUnescaping], OFInvalidEncodingException) + CHECK_EXCEPT([@"&#x;" stringByXMLUnescaping], + OFInvalidEncodingException) + CHECK_EXCEPT([@"&#xg;" stringByXMLUnescaping], + OFInvalidEncodingException) puts(""); return 0; }