Differences From Artifact [d68d151905]:
- File src/OFString.m — part of check-in [1c65e229e8] at 2011-07-09 12:04:52 on branch trunk — Optimize +[stringWithPath:]. (user: js, size: 42707) [annotate] [blame] [check-ins using]
To Artifact [fee37530e3]:
- File
src/OFString.m
— part of check-in
[36e8a94f34]
at
2011-07-09 14:45:44
on branch trunk
— Cache the length of a string.
Also removes -[appendCStringWithoutUTF8Checking:] as it is dangerous and
not compatible with this optimization. (user: js, size: 43190) [annotate] [blame] [check-ins using]
︙ | ︙ | |||
78 79 80 81 82 83 84 | return OF_ORDERED_ASCENDING; } return OF_ORDERED_SAME; } int | | | | | | | | | | | | | | > | | | | > | | | | | > > | > > | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | return OF_ORDERED_ASCENDING; } return OF_ORDERED_SAME; } int of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length) { size_t i, tmpLength = cStringLength; int isUTF8 = 0; madvise((void*)cString, cStringLength, MADV_SEQUENTIAL); for (i = 0; i < cStringLength; i++) { /* No sign of UTF-8 here */ if (OF_LIKELY(!(cString[i] & 0x80))) continue; isUTF8 = 1; /* We're missing a start byte here */ if (OF_UNLIKELY(!(cString[i] & 0x40))) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } /* 2 byte sequences for code points 0 - 127 are forbidden */ if (OF_UNLIKELY((cString[i] & 0x7E) == 0x40)) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } /* We have at minimum a 2 byte character -> check next byte */ if (OF_UNLIKELY(cStringLength <= i + 1 || (cString[i + 1] & 0xC0) != 0x80)) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } /* Check if we have at minimum a 3 byte character */ if (OF_LIKELY(!(cString[i] & 0x20))) { i++; tmpLength--; continue; } /* We have at minimum a 3 byte char -> check second next byte */ if (OF_UNLIKELY(cStringLength <= i + 2 || (cString[i + 2] & 0xC0) != 0x80)) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } /* Check if we have a 4 byte character */ if (OF_LIKELY(!(cString[i] & 0x10))) { i += 2; tmpLength -= 2; continue; } /* We have a 4 byte character -> check third next byte */ if (OF_UNLIKELY(cStringLength <= i + 3 || (cString[i + 3] & 0xC0) != 0x80)) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } /* * Just in case, check if there's a 5th character, which is * forbidden by UTF-8 */ if (OF_UNLIKELY(cString[i] & 0x08)) { madvise((void*)cString, cStringLength, MADV_NORMAL); return -1; } i += 3; tmpLength -= 3; } madvise((void*)cString, cStringLength, MADV_NORMAL); if (length != NULL) *length = tmpLength; return isUTF8; } size_t of_string_unicode_to_utf8(of_unichar_t character, char *buffer) { |
︙ | ︙ | |||
472 473 474 475 476 477 478 | s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cString = [self allocMemoryWithSize: cStringLength + 1]; s->cStringLength = cStringLength; if (encoding == OF_STRING_ENCODING_UTF_8) { | | > > > > | 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cString = [self allocMemoryWithSize: cStringLength + 1]; s->cStringLength = cStringLength; if (encoding == OF_STRING_ENCODING_UTF_8) { switch (of_string_check_utf8(cString, cStringLength, &s->length)) { case 1: s->isUTF8 = YES; break; case -1: @throw [OFInvalidEncodingException newWithClass: isa]; } memcpy(s->cString, cString, cStringLength); s->cString[cStringLength] = 0; return self; } /* All other encodings we support are single byte encodings */ s->length = cStringLength; if (encoding == OF_STRING_ENCODING_ISO_8859_1) { for (i = j = 0; i < cStringLength; i++) { char buffer[4]; size_t bytes; if (!(cString[i] & 0x80)) { |
︙ | ︙ | |||
587 588 589 590 591 592 593 594 595 596 597 | { self = [super init]; @try { s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cStringLength = [string cStringLength]; s->isUTF8 = string->s->isUTF8; s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; | > > > > > | | 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 | { self = [super init]; @try { s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); /* * We need one call to make sure it's initialized (in case it's * a constant string). */ s->cStringLength = [string cStringLength]; s->isUTF8 = string->s->isUTF8; s->length = string->s->length; s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; memcpy(s->cString, string->s->cString, s->cStringLength + 1); } @catch (id e) { [self release]; @throw e; } return self; } |
︙ | ︙ | |||
648 649 650 651 652 653 654 655 656 657 658 659 660 661 | swap = YES; s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cStringLength = length; s->cString = [self allocMemoryWithSize: (length * 4) + 1]; for (i = 0; i < length; i++) { char buffer[4]; size_t characterLen = of_string_unicode_to_utf8( (swap ? of_bswap32(string[i]) : string[i]), buffer); | > | 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 | swap = YES; s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cStringLength = length; s->cString = [self allocMemoryWithSize: (length * 4) + 1]; s->length = length; for (i = 0; i < length; i++) { char buffer[4]; size_t characterLen = of_string_unicode_to_utf8( (swap ? of_bswap32(string[i]) : string[i]), buffer); |
︙ | ︙ | |||
754 755 756 757 758 759 760 761 762 763 764 765 766 767 | swap = YES; s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cStringLength = length; s->cString = [self allocMemoryWithSize: (length * 4) + 1]; for (i = 0; i < length; i++) { char buffer[4]; of_unichar_t character = (swap ? of_bswap16(string[i]) : string[i]); size_t characterLen; | > | 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 | swap = YES; s = [self allocMemoryWithSize: sizeof(*s)]; memset(s, 0, sizeof(*s)); s->cStringLength = length; s->cString = [self allocMemoryWithSize: (length * 4) + 1]; s->length = length; for (i = 0; i < length; i++) { char buffer[4]; of_unichar_t character = (swap ? of_bswap16(string[i]) : string[i]); size_t characterLen; |
︙ | ︙ | |||
781 782 783 784 785 786 787 788 789 790 791 792 793 794 | ? of_bswap16(string[i + 1]) : string[i + 1]); character = (((character & 0x3FF) << 10) | (nextCharacter & 0x3FF)) + 0x10000; i++; s->cStringLength--; } characterLen = of_string_unicode_to_utf8( character, buffer); switch (characterLen) { case 1: | > | 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 | ? of_bswap16(string[i + 1]) : string[i + 1]); character = (((character & 0x3FF) << 10) | (nextCharacter & 0x3FF)) + 0x10000; i++; s->cStringLength--; s->length--; } characterLen = of_string_unicode_to_utf8( character, buffer); switch (characterLen) { case 1: |
︙ | ︙ | |||
873 874 875 876 877 878 879 | arguments)) == -1) @throw [OFInvalidFormatException newWithClass: isa]; s->cStringLength = cStringLength; @try { switch (of_string_check_utf8(s->cString, | | | 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 | arguments)) == -1) @throw [OFInvalidFormatException newWithClass: isa]; s->cStringLength = cStringLength; @try { switch (of_string_check_utf8(s->cString, cStringLength, &s->length)) { case 1: s->isUTF8 = YES; break; case -1: @throw [OFInvalidEncodingException newWithClass: isa]; } |
︙ | ︙ | |||
926 927 928 929 930 931 932 933 934 935 936 937 | /* * First needs to be a call to be sure it is initialized, in * case it's a constant string. */ s->cStringLength = [firstComponent cStringLength]; s->isUTF8 = firstComponent->s->isUTF8; /* Calculate length and see if we need UTF-8 */ va_copy(argumentsCopy, arguments); while ((component = va_arg(argumentsCopy, OFString*)) != nil) { /* First needs to be a call, see above */ | > | | | 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | /* * First needs to be a call to be sure it is initialized, in * case it's a constant string. */ s->cStringLength = [firstComponent cStringLength]; s->isUTF8 = firstComponent->s->isUTF8; s->length = firstComponent->s->length; /* Calculate length and see if we need UTF-8 */ va_copy(argumentsCopy, arguments); while ((component = va_arg(argumentsCopy, OFString*)) != nil) { /* First needs to be a call, see above */ s->cStringLength += 1 + [component cStringLength]; s->length += 1 + component->s->length; if (component->s->isUTF8) s->isUTF8 = YES; } s->cString = [self allocMemoryWithSize: s->cStringLength + 1]; |
︙ | ︙ | |||
1108 1109 1110 1111 1112 1113 1114 | - (const char*)cString { return s->cString; } - (size_t)length { | < < < | < < > > > > | > | | 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 | - (const char*)cString { return s->cString; } - (size_t)length { return s->length; } - (size_t)cStringLength { return s->cStringLength; } - (BOOL)isEqual: (id)object { OFString *otherString; if (![object isKindOfClass: [OFString class]]) return NO; otherString = object; if ([otherString cStringLength] != s->cStringLength || otherString->s->length != s->length) return NO; if (strcmp(s->cString, otherString->s->cString)) return NO; return YES; } - copy { |
︙ | ︙ | |||
1301 1302 1303 1304 1305 1306 1307 | return element; } - (of_unichar_t)characterAtIndex: (size_t)index { of_unichar_t character; | < | | > < < < < | 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 | return element; } - (of_unichar_t)characterAtIndex: (size_t)index { of_unichar_t character; if (index >= s->length) @throw [OFOutOfRangeException newWithClass: isa]; if (!s->isUTF8) return s->cString[index]; index = of_string_index_to_position(s->cString, index, s->cStringLength); if (!of_string_utf8_to_unicode(s->cString + index, s->cStringLength - index, &character)) @throw [OFInvalidEncodingException newWithClass: isa]; return character; } |
︙ | ︙ | |||
1364 1365 1366 1367 1368 1369 1370 | return OF_INVALID_INDEX; } } - (BOOL)containsString: (OFString*)string { const char *cString = [string cString]; | | > > > > > > > < < < < < < < | 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 | return OF_INVALID_INDEX; } } - (BOOL)containsString: (OFString*)string { const char *cString = [string cString]; size_t i, cStringLength = string->s->cStringLength; if (cStringLength == 0) return YES; if (cStringLength > s->cStringLength) return NO; for (i = 0; i <= s->cStringLength - cStringLength; i++) if (!memcmp(s->cString + i, cString, cStringLength)) return YES; return NO; } - (OFString*)substringFromIndex: (size_t)start toIndex: (size_t)end { if (start > end) @throw [OFInvalidArgumentException newWithClass: isa selector: _cmd]; if (end > s->length) @throw [OFOutOfRangeException newWithClass: isa]; if (s->isUTF8) { start = of_string_index_to_position(s->cString, start, s->cStringLength); end = of_string_index_to_position(s->cString, end, s->cStringLength); } return [OFString stringWithCString: s->cString + start length: end - start]; } - (OFString*)substringWithRange: (of_range_t)range { return [self substringFromIndex: range.start |
︙ | ︙ | |||
1858 1859 1860 1861 1862 1863 1864 | - (of_unichar_t*)unicodeString { OFObject *object = [[[OFObject alloc] init] autorelease]; of_unichar_t *ret; size_t i, j; | | | 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 | - (of_unichar_t*)unicodeString { OFObject *object = [[[OFObject alloc] init] autorelease]; of_unichar_t *ret; size_t i, j; ret = [object allocMemoryForNItems: s->length + 2 withSize: sizeof(of_unichar_t)]; i = 0; j = 0; ret[j++] = 0xFEFF; |
︙ | ︙ |