Overview
Comment: | OFString: Zero-terminate UTF-16 strings.
This partly reverts e8502c7. The rationale behind this is that, on some OSes, native APIs (on e.g. However, forcing zero-termination for every string so that -[characters] |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
9d3cd5e5fe7a26dc10fe5687da330eea |
User & Date: | js on 2013-01-07 22:07:58 |
Other Links: | manifest | tags |
Context
2013-01-07
| ||
23:42 | Win32: Correctly handle Unicode in environment. check-in: 1fb00cc3b4 user: js tags: trunk | |
22:07 | OFString: Zero-terminate UTF-16 strings. check-in: 9d3cd5e5fe user: js tags: trunk | |
14:49 | OFProcess: Implement environment passing on Win32. check-in: f51bceaa35 user: js tags: trunk | |
Changes
Modified src/OFString.h from [ad8cbdf010] to [31ec44e41b].
︙ | ︙ | |||
158 159 160 161 162 163 164 | * @return A new autoreleased OFString */ + (instancetype)stringWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; /*! | | > > > > > > > > | > > > > > > > > > > > | 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | * @return A new autoreleased OFString */ + (instancetype)stringWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; /*! * @brief Creates a new OFString from a UTF-16 encoded string. * * @param string The UTF-16 string * @return A new autoreleased OFString */ + (instancetype)stringWithUTF16String: (const uint16_t*)string; /*! * @brief Creates a new OFString from a UTF-16 encoded string with the specified * length. * * @param string The UTF-16 string * @param length The length of the unicode string * @return A new autoreleased OFString */ + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length; /*! * @brief Creates a new OFString from a UTF-16 encoded string, assuming the * specified byte order if no BOM is found. * * @param string The UTF-16 string * @param byteOrder The byte order to assume if there is no BOM * @return A new autoreleased OFString */ + (instancetype)stringWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder; /*! * @brief Creates a new OFString from a UTF-16 encoded string with the * specified length, assuming the specified byte order if no BOM is * found. * * @param string The UTF-16 string * @param length The length of the unicode string |
︙ | ︙ | |||
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | * @param byteOrder The byte order to assume if there is no BOM * @return An initialized OFString */ - initWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length. * * @param string The UTF-16 string * @param length The length of the UTF-16 string * @return An initialized OFString */ - initWithUTF16String: (const uint16_t*)string length: (size_t)length; /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length, assuming the specified byte order if no BOM is * found. * * @param string The UTF-16 string * @param length The length of the UTF-16 string | > > > > > > > > > > > > > > > > > > > | 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 | * @param byteOrder The byte order to assume if there is no BOM * @return An initialized OFString */ - initWithCharacters: (const of_unichar_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder; /*! * @brief Initializes an already allocated OFString with a UTF-16 string. * * @param string The UTF-16 string * @return An initialized OFString */ - initWithUTF16String: (const uint16_t*)string; /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length. * * @param string The UTF-16 string * @param length The length of the UTF-16 string * @return An initialized OFString */ - initWithUTF16String: (const uint16_t*)string length: (size_t)length; /*! * @brief Initializes an already allocated OFString with a UTF-16 string, * assuming the specified byte order if no BOM is found. * * @param string The UTF-16 string * @param byteOrder The byte order to assume if there is no BOM * @return An initialized OFString */ - initWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder; /*! * @brief Initializes an already allocated OFString with a UTF-16 string with * the specified length, assuming the specified byte order if no BOM is * found. * * @param string The UTF-16 string * @param length The length of the UTF-16 string |
︙ | ︙ | |||
898 899 900 901 902 903 904 905 906 907 | #endif #ifdef __cplusplus extern "C" { #endif extern size_t of_string_utf8_encode(of_unichar_t, char*); extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); #ifdef __cplusplus } #endif | > | 936 937 938 939 940 941 942 943 944 945 946 | #endif #ifdef __cplusplus extern "C" { #endif extern size_t of_string_utf8_encode(of_unichar_t, char*); extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); extern size_t of_string_utf16_length(const uint16_t*); #ifdef __cplusplus } #endif |
Modified src/OFString.m from [9b71ee7ecc] to [6044846057].
︙ | ︙ | |||
137 138 139 140 141 142 143 144 145 146 147 148 149 150 | *ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) | ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F); return 4; } return 0; } static OFString* standardize_path(OFArray *components, OFString *currentDirectory, OFString *parentDirectory, OFString *joinString) { void *pool = objc_autoreleasePoolPush(); OFMutableArray *array; | > > > > > > > > > > > | 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | *ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) | ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F); return 4; } return 0; } size_t of_string_utf16_length(const uint16_t *string) { size_t length = 0; while (*string++ != 0) length++; return length; } static OFString* standardize_path(OFArray *components, OFString *currentDirectory, OFString *parentDirectory, OFString *joinString) { void *pool = objc_autoreleasePoolPush(); OFMutableArray *array; |
︙ | ︙ | |||
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 | length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return (id)[[OFString_UTF8 alloc] initWithCharacters: string length: length byteOrder: byteOrder]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string length: length]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string length: length | > > > > > > > > > > > > | 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 | length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return (id)[[OFString_UTF8 alloc] initWithCharacters: string length: length byteOrder: byteOrder]; } - initWithUTF16String: (const uint16_t*)string { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string length: length]; } - initWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string byteOrder: byteOrder]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return (id)[[OFString_UTF8 alloc] initWithUTF16String: string length: length |
︙ | ︙ | |||
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return [[[self alloc] initWithCharacters: string length: length byteOrder: byteOrder] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length { return [[[self alloc] initWithUTF16String: string length: length] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return [[[self alloc] initWithUTF16String: string length: length | > > > > > > > > > > > > | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 | length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return [[[self alloc] initWithCharacters: string length: length byteOrder: byteOrder] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string { return [[[self alloc] initWithUTF16String: string] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length { return [[[self alloc] initWithUTF16String: string length: length] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder { return [[[self alloc] initWithUTF16String: string byteOrder: byteOrder] autorelease]; } + (instancetype)stringWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { return [[[self alloc] initWithUTF16String: string length: length |
︙ | ︙ | |||
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 | [self doesNotRecognizeSelector: _cmd]; abort(); } @catch (id e) { [self release]; @throw e; } } - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return [self initWithUTF16String: string length: length byteOrder: OF_BYTE_ORDER_NATIVE]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { @try { [self doesNotRecognizeSelector: _cmd]; | > > > > > > > > > > > > > > > | 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 | [self doesNotRecognizeSelector: _cmd]; abort(); } @catch (id e) { [self release]; @throw e; } } - initWithUTF16String: (const uint16_t*)string { return [self initWithUTF16String: string length: of_string_utf16_length(string) byteOrder: OF_BYTE_ORDER_NATIVE]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length { return [self initWithUTF16String: string length: length byteOrder: OF_BYTE_ORDER_NATIVE]; } - initWithUTF16String: (const uint16_t*)string byteOrder: (of_byte_order_t)byteOrder { return [self initWithUTF16String: string length: of_string_utf16_length(string) byteOrder: byteOrder]; } - initWithUTF16String: (const uint16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { @try { [self doesNotRecognizeSelector: _cmd]; |
︙ | ︙ | |||
2009 2010 2011 2012 2013 2014 2015 | size_t length = [self length]; uint16_t *ret; size_t i, j; BOOL swap = (byteOrder != OF_BYTE_ORDER_NATIVE); /* Allocate memory for the worst case */ ret = [object allocMemoryWithSize: sizeof(uint16_t) | | | 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 | size_t length = [self length]; uint16_t *ret; size_t i, j; BOOL swap = (byteOrder != OF_BYTE_ORDER_NATIVE); /* Allocate memory for the worst case */ ret = [object allocMemoryWithSize: sizeof(uint16_t) count: (length + 1) * 2]; j = 0; for (i = 0; i < length; i++) { of_unichar_t c = characters[i]; if (c > 0x10FFFF) |
︙ | ︙ | |||
2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 | c -= 0x10000; ret[j++] = 0xD800 | (c >> 10); ret[j++] = 0xDC00 | (c & 0x3FF); } else ret[j++] = c; } } @try { ret = [object resizeMemory: ret size: sizeof(uint16_t) | > | | 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 | c -= 0x10000; ret[j++] = 0xD800 | (c >> 10); ret[j++] = 0xDC00 | (c & 0x3FF); } else ret[j++] = c; } } ret[j] = 0; @try { ret = [object resizeMemory: ret size: sizeof(uint16_t) count: j + 1]; } @catch (OFOutOfMemoryException *e) { /* We don't care, as we only tried to make it smaller */ } objc_autoreleasePoolPop(pool); return ret; |
︙ | ︙ |
Modified tests/OFStringTests.m from [101fcea2a5] to [7f9159fd57].
︙ | ︙ | |||
43 44 45 46 47 48 49 | 0xFEFF, 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0x1F03A }; static of_unichar_t sucstr[] = { 0xFFFE0000, 0x66000000, 0xF6000000, 0xF6000000, 0x62000000, 0xE4000000, 0x72000000, 0x3AF00100 }; static uint16_t utf16str[] = { | | | > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | 0xFEFF, 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0x1F03A }; static of_unichar_t sucstr[] = { 0xFFFE0000, 0x66000000, 0xF6000000, 0xF6000000, 0x62000000, 0xE4000000, 0x72000000, 0x3AF00100 }; static uint16_t utf16str[] = { 0xFEFF, 'f', 0xF6, 0xF6, 'b', 0xE4, 'r', 0xD83C, 0xDC3A, 0 }; static uint16_t sutf16str[] = { 0xFFFE, 0x6600, 0xF600, 0xF600, 0x6200, 0xE400, 0x7200, 0x3CD8, 0x3ADC, 0 }; @interface EntityHandler: OFObject <OFStringXMLUnescapingDelegate> @end @implementation EntityHandler - (OFString*)string: (OFString*)string |
︙ | ︙ | |||
162 163 164 165 166 167 168 | sizeof(*ucstr)]) && [is isEqual: @"fööbär🀺"] && (is = [OFString stringWithCharacters: sucstr length: sizeof(sucstr) / sizeof(*sucstr)]) && [is isEqual: @"fööbär🀺"]) | | | < < | < < | 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | sizeof(*ucstr)]) && [is isEqual: @"fööbär🀺"] && (is = [OFString stringWithCharacters: sucstr length: sizeof(sucstr) / sizeof(*sucstr)]) && [is isEqual: @"fööbär🀺"]) TEST(@"+[stringWithUTF16String:]", (is = [OFString stringWithUTF16String: utf16str]) && [is isEqual: @"fööbär🀺"] && (is = [OFString stringWithUTF16String: sutf16str]) && [is isEqual: @"fööbär🀺"]) TEST(@"+[stringWithContentsOfFile:encoding]", (is = [OFString stringWithContentsOfFile: @"testfile.txt" encoding: OF_STRING_ENCODING_ISO_8859_1]) && [is isEqual: @"testäöü"]) |
︙ | ︙ |