Overview
Comment: | Also parse &#NNNN; and &#xHHHH; in -[stringByXMLUnescaping]. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
a80e9b948d0a26416f5fd37987241f4c |
User & Date: | js on 2009-07-14 21:58:22 |
Other Links: | manifest | tags |
Context
2009-07-16
| ||
23:02 | Also handle '+' in -[stringByURLDecoding]. check-in: 92d8754e02 user: js tags: trunk | |
2009-07-14
| ||
21:58 | Also parse &#NNNN; and &#xHHHH; in -[stringByXMLUnescaping]. check-in: a80e9b948d user: js tags: trunk | |
21:22 | Add of_string_unicode_to_utf8 which converts unicode to UTF-8. check-in: de937a62e4 user: js tags: trunk | |
Changes
Modified src/OFString.h from [42972211ab] to [292d36265a].
︙ | ︙ | |||
12 13 14 15 16 17 18 | #include <stdio.h> #include <stdarg.h> #import "OFObject.h" #import "OFArray.h" extern int of_string_check_utf8(const char*, size_t); | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | #include <stdio.h> #include <stdarg.h> #import "OFObject.h" #import "OFArray.h" extern int of_string_check_utf8(const char*, size_t); extern size_t of_string_unicode_to_utf8(uint32_t, char*); /** * A class for managing strings. */ @interface OFString: OFObject <OFCopying, OFMutableCopying> { char *string; |
︙ | ︙ |
Modified src/OFString.m from [2bac621bba] to [c4e41efb24].
︙ | ︙ | |||
102 103 104 105 106 107 108 | madvise((void*)str, len, MADV_NORMAL); return utf8; } size_t | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | madvise((void*)str, len, MADV_NORMAL); return utf8; } size_t of_string_unicode_to_utf8(uint32_t c, char *buf) { if (c < 0x80) { buf[0] = c; return 1; } if (c < 0x800) { buf[0] = 0xC0 | (c >> 6); |
︙ | ︙ |
Modified src/OFXMLParser.m from [b77468664e] to [8929707b71].
︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | #include "config.h" #include <string.h> #import "OFXMLParser.h" #import "OFAutoreleasePool.h" #import "OFExceptions.h" int _OFXMLParser_reference; @implementation OFXMLParser + xmlParser { return [[[self alloc] init] autorelease]; } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | #include "config.h" #include <string.h> #import "OFXMLParser.h" #import "OFAutoreleasePool.h" #import "OFExceptions.h" #import "OFMacros.h" int _OFXMLParser_reference; static OF_INLINE OFString* parse_numeric_entity(char *entity, size_t length) { uint32_t c; size_t i; char buf[4]; if (length == 1 || *entity != '#') return nil; c = 0; entity++; length--; if (entity[0] == 'x') { if (length == 1) return nil; entity++; length--; for (i = 0; i < length; i++) { if (entity[i] >= '0' && entity[i] <= '9') c = (c * 0x10) + (entity[i] - '0'); else if (entity[i] >= 'A' && entity[i] <= 'F') c = (c * 0x10) + (entity[i] - 'A' + 10); else if (entity[i] >= 'a' && entity[i] <= 'f') c = (c * 0x10) + (entity[i] - 'A' + 10); else return nil; } } else { for (i = 0; i < length; i++) { if (entity[i] >= '0' && entity[i] <= '9') c = (c * 10) + (entity[i] - '0'); else return nil; } } if ((i = of_string_unicode_to_utf8(c, buf)) == 0) return nil; return [OFString stringWithCString: buf andLength: i]; } @implementation OFXMLParser + xmlParser { return [[[self alloc] init] autorelease]; } |
︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 | if (!in_entity && string[i] == '&') { [ret appendCStringWithoutUTF8Checking: string + last andLength: i - last]; last = i + 1; in_entity = YES; } else if (in_entity && string[i] == ';') { size_t len = i - last; | > | | | | | > > > > > > > > > > > > > | | | 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | if (!in_entity && string[i] == '&') { [ret appendCStringWithoutUTF8Checking: string + last andLength: i - last]; last = i + 1; in_entity = YES; } else if (in_entity && string[i] == ';') { char *entity = string + last; size_t len = i - last; if (len == 2 && !memcmp(entity, "lt", 2)) [ret appendString: @"<"]; else if (len == 2 && !memcmp(entity, "gt", 2)) [ret appendString: @">"]; else if (len == 4 && !memcmp(entity, "quot", 4)) [ret appendString: @"\""]; else if (len == 4 && !memcmp(entity, "apos", 4)) [ret appendString: @"'"]; else if (len == 3 && !memcmp(entity, "amp", 3)) [ret appendString: @"&"]; else if (entity[0] == '#') { OFAutoreleasePool *pool; OFString *tmp; pool = [[OFAutoreleasePool alloc] init]; tmp = parse_numeric_entity(entity, len); if (tmp == nil) @throw [OFInvalidEncodingException newWithClass: isa]; [ret appendString: tmp]; [pool release]; } else if (h != nil) { OFAutoreleasePool *pool; OFString *n, *tmp; pool = [[OFAutoreleasePool alloc] init]; n = [OFString stringWithCString: entity andLength: len]; tmp = [h foundUnknownEntityNamed: n]; if (tmp == nil) @throw [OFInvalidEncodingException newWithClass: isa]; |
︙ | ︙ |
Modified tests/OFString/OFString.m from [b49062f26b] to [2dbe4533ed].
︙ | ︙ | |||
20 21 22 23 24 25 26 | #ifndef _WIN32 #define ZD "%zd" #else #define ZD "%u" #endif | | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | #ifndef _WIN32 #define ZD "%zd" #else #define ZD "%u" #endif #define NUM_TESTS 60 #define SUCCESS \ printf("\r\033[1;%dmTests successful: " ZD "/%d\033[0m", \ (i == NUM_TESTS - 1 ? 32 : 33), i + 1, NUM_TESTS); \ fflush(stdout); #define FAIL \ printf("\r\033[K\033[1;31mTest " ZD "/%d failed!\033[m\n", \ i + 1, NUM_TESTS); \ |
︙ | ︙ | |||
197 198 199 200 201 202 203 204 205 206 207 208 | h = [[EntityHandler alloc] init]; s1 = [@"x&foo;y" stringByXMLUnescapingWithHandler: h]; CHECK([s1 isEqual: @"xbary"]); CHECK_EXCEPT([@"x&" stringByXMLUnescaping], OFInvalidEncodingException) puts(""); return 0; } | > > > > > > > > > > > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | h = [[EntityHandler alloc] init]; s1 = [@"x&foo;y" stringByXMLUnescapingWithHandler: h]; CHECK([s1 isEqual: @"xbary"]); CHECK_EXCEPT([@"x&" stringByXMLUnescaping], OFInvalidEncodingException) CHECK([[@"y" stringByXMLUnescaping] isEqual: @"y"]); CHECK([[@"ä" stringByXMLUnescaping] isEqual: @"ä"]); CHECK([[@"€" stringByXMLUnescaping] isEqual: @"€"]); CHECK([[@"𝄞" stringByXMLUnescaping] isEqual: @"𝄞"]); CHECK_EXCEPT([@"&#;" stringByXMLUnescaping], OFInvalidEncodingException) CHECK_EXCEPT([@"&#x;" stringByXMLUnescaping], OFInvalidEncodingException) CHECK_EXCEPT([@"&#xg;" stringByXMLUnescaping], OFInvalidEncodingException) puts(""); return 0; } |