Differences From Artifact [035a617a1b]:
- File
src/OFString_UTF8.m
— part of check-in
[2a27cf3000]
at
2016-01-03 00:41:26
on branch trunk
— Update copyright
While at it, also update the mail address. (user: js, size: 28015) [annotate] [blame] [check-ins using] [more...]
To Artifact [f9779f2392]:
- File src/OFString_UTF8.m — part of check-in [e0b9167693] at 2016-02-21 15:37:42 on branch trunk — Make use of C99-style for loops (user: js, size: 28082) [annotate] [blame] [check-ins using]
︙ | ︙ | |||
41 42 43 44 45 46 47 | extern const of_char16_t of_iso_8859_15[128]; extern const of_char16_t of_windows_1252[128]; extern const of_char16_t of_codepage_437[128]; static inline int memcasecmp(const char *first, const char *second, size_t length) { | < < | | | | 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | extern const of_char16_t of_iso_8859_15[128]; extern const of_char16_t of_windows_1252[128]; extern const of_char16_t of_codepage_437[128]; static inline int memcasecmp(const char *first, const char *second, size_t length) { for (size_t i = 0; i < length; i++) { if (tolower((int)first[i]) > tolower((int)second[i])) return OF_ORDERED_DESCENDING; if (tolower((int)first[i]) < tolower((int)second[i])) return OF_ORDERED_ASCENDING; } return OF_ORDERED_SAME; } int of_string_utf8_check(const char *UTF8String, size_t UTF8Length, size_t *length) { size_t tmpLength = UTF8Length; int isUTF8 = 0; for (size_t i = 0; i < UTF8Length; i++) { /* No sign of UTF-8 here */ if OF_LIKELY (!(UTF8String[i] & 0x80)) continue; isUTF8 = 1; /* We're missing a start byte here */ |
︙ | ︙ | |||
123 124 125 126 127 128 129 | return isUTF8; } size_t of_string_utf8_get_index(const char *string, size_t position) { | | | < < | | 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | return isUTF8; } size_t of_string_utf8_get_index(const char *string, size_t position) { size_t index = position; for (size_t i = 0; i < position; i++) if OF_UNLIKELY ((string[i] & 0xC0) == 0x80) index--; return index; } size_t of_string_utf8_get_position(const char *string, size_t index, size_t length) { for (size_t i = 0; i <= index; i++) if OF_UNLIKELY ((string[i] & 0xC0) == 0x80) if (++index > length) return OF_NOT_FOUND; return index; } |
︙ | ︙ | |||
207 208 209 210 211 212 213 | - initWithCString: (const char*)cString encoding: (of_string_encoding_t)encoding length: (size_t)cStringLength { self = [super init]; @try { | < > | 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | - initWithCString: (const char*)cString encoding: (of_string_encoding_t)encoding length: (size_t)cStringLength { self = [super init]; @try { const of_char16_t *table; size_t j; if (encoding == OF_STRING_ENCODING_UTF_8 && cStringLength >= 3 && memcmp(cString, "\xEF\xBB\xBF", 3) == 0) { cString += 3; cStringLength -= 3; } |
︙ | ︙ | |||
247 248 249 250 251 252 253 | return self; } /* All other encodings we support are single byte encodings */ _s->length = cStringLength; if (encoding == OF_STRING_ENCODING_ISO_8859_1) { | > | | 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | return self; } /* All other encodings we support are single byte encodings */ _s->length = cStringLength; if (encoding == OF_STRING_ENCODING_ISO_8859_1) { j = 0; for (size_t i = 0; i < cStringLength; i++) { char buffer[4]; size_t bytes; if (!(cString[i] & 0x80)) { _s->cString[j++] = cString[i]; continue; } |
︙ | ︙ | |||
292 293 294 295 296 297 298 | case OF_STRING_ENCODING_CODEPAGE_437: table = of_codepage_437; break; default: @throw [OFInvalidEncodingException exception]; } | > | | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | case OF_STRING_ENCODING_CODEPAGE_437: table = of_codepage_437; break; default: @throw [OFInvalidEncodingException exception]; } j = 0; for (size_t i = 0; i < cStringLength; i++) { char buffer[4]; of_unichar_t character; size_t characterBytes; if (!(cString[i] & 0x80)) { _s->cString[j++] = cString[i]; continue; |
︙ | ︙ | |||
403 404 405 406 407 408 409 | - initWithCharacters: (const of_unichar_t*)characters length: (size_t)length { self = [super init]; @try { | | > | | 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 | - initWithCharacters: (const of_unichar_t*)characters length: (size_t)length { self = [super init]; @try { size_t j; _s = &_storage; _s->cString = [self allocMemoryWithSize: (length * 4) + 1]; _s->length = length; j = 0; for (size_t i = 0; i < length; i++) { size_t len = of_string_utf8_encode(characters[i], _s->cString + j); if (len == 0) @throw [OFInvalidEncodingException exception]; if (len > 1) |
︙ | ︙ | |||
447 448 449 450 451 452 453 | - initWithUTF16String: (const of_char16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { self = [super init]; @try { | | > | | 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 | - initWithUTF16String: (const of_char16_t*)string length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { self = [super init]; @try { size_t j; bool swap = false; if (length > 0 && *string == 0xFEFF) { string++; length--; } else if (length > 0 && *string == 0xFFFE) { swap = true; string++; length--; } else if (byteOrder != OF_BYTE_ORDER_NATIVE) swap = true; _s = &_storage; _s->cString = [self allocMemoryWithSize: (length * 4) + 1]; _s->length = length; j = 0; for (size_t i = 0; i < length; i++) { of_unichar_t character = (swap ? OF_BSWAP16(string[i]) : string[i]); size_t len; /* Missing high surrogate */ if ((character & 0xFC00) == 0xDC00) @throw [OFInvalidEncodingException exception]; |
︙ | ︙ | |||
531 532 533 534 535 536 537 | - initWithUTF32String: (const of_char32_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { self = [super init]; @try { | | > | | 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 | - initWithUTF32String: (const of_char32_t*)characters length: (size_t)length byteOrder: (of_byte_order_t)byteOrder { self = [super init]; @try { size_t j; bool swap = false; if (length > 0 && *characters == 0xFEFF) { characters++; length--; } else if (length > 0 && *characters == 0xFFFE0000) { swap = true; characters++; length--; } else if (byteOrder != OF_BYTE_ORDER_NATIVE) swap = true; _s = &_storage; _s->cString = [self allocMemoryWithSize: (length * 4) + 1]; _s->length = length; j = 0; for (size_t i = 0; i < length; i++) { char buffer[4]; size_t len = of_string_utf8_encode( (swap ? OF_BSWAP32(characters[i]) : characters[i]), buffer); switch (len) { case 1: |
︙ | ︙ | |||
848 849 850 851 852 853 854 | return OF_ORDERED_ASCENDING; return OF_ORDERED_SAME; } - (uint32_t)hash { | < | | 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 | return OF_ORDERED_ASCENDING; return OF_ORDERED_SAME; } - (uint32_t)hash { uint32_t hash; if (_s->hashed) return _s->hash; OF_HASH_INIT(hash); for (size_t i = 0; i < _s->cStringLength; i++) { of_unichar_t c; size_t length; if ((length = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c)) == 0) @throw [OFInvalidEncodingException exception]; |
︙ | ︙ | |||
921 922 923 924 925 926 927 | } - (of_range_t)rangeOfString: (OFString*)string options: (int)options range: (of_range_t)range { const char *cString = [string UTF8String]; | | | 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 | } - (of_range_t)rangeOfString: (OFString*)string options: (int)options range: (of_range_t)range { const char *cString = [string UTF8String]; size_t cStringLength = [string UTF8StringLength]; size_t rangeLocation, rangeLength; if (range.length > SIZE_MAX - range.location || range.location + range.length > _s->length) @throw [OFOutOfRangeException exception]; if (_s->isUTF8) { |
︙ | ︙ | |||
946 947 948 949 950 951 952 | if (cStringLength == 0) return of_range(0, 0); if (cStringLength > rangeLength) return of_range(OF_NOT_FOUND, 0); if (options & OF_STRING_SEARCH_BACKWARDS) { | | | | | | 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 | if (cStringLength == 0) return of_range(0, 0); if (cStringLength > rangeLength) return of_range(OF_NOT_FOUND, 0); if (options & OF_STRING_SEARCH_BACKWARDS) { for (size_t i = rangeLength - cStringLength;; i--) { if (memcmp(_s->cString + rangeLocation + i, cString, cStringLength) == 0) { range.location += of_string_utf8_get_index( _s->cString + rangeLocation, i); range.length = [string length]; return range; } /* Did not match and we're at the last char */ if (i == 0) return of_range(OF_NOT_FOUND, 0); } } else { for (size_t i = 0; i <= rangeLength - cStringLength; i++) { if (memcmp(_s->cString + rangeLocation + i, cString, cStringLength) == 0) { range.location += of_string_utf8_get_index( _s->cString + rangeLocation, i); range.length = [string length]; return range; } } } return of_range(OF_NOT_FOUND, 0); } - (bool)containsString: (OFString*)string { const char *cString = [string UTF8String]; size_t cStringLength = [string UTF8StringLength]; if (cStringLength == 0) return true; if (cStringLength > _s->cStringLength) return false; for (size_t i = 0; i <= _s->cStringLength - cStringLength; i++) if (memcmp(_s->cString + i, cString, cStringLength) == 0) return true; return false; } - (OFString*)substringWithRange: (of_range_t)range |
︙ | ︙ | |||
1042 1043 1044 1045 1046 1047 1048 | options: (int)options { void *pool; OFMutableArray *array; const char *cString = [delimiter UTF8String]; size_t cStringLength = [delimiter UTF8StringLength]; bool skipEmpty = (options & OF_STRING_SKIP_EMPTY); | | > | | 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 | options: (int)options { void *pool; OFMutableArray *array; const char *cString = [delimiter UTF8String]; size_t cStringLength = [delimiter UTF8StringLength]; bool skipEmpty = (options & OF_STRING_SKIP_EMPTY); size_t last; OFString *component; array = [OFMutableArray array]; pool = objc_autoreleasePoolPush(); if (cStringLength > _s->cStringLength) { [array addObject: [[self copy] autorelease]]; objc_autoreleasePoolPop(pool); return array; } last = 0; for (size_t i = 0; i <= _s->cStringLength - cStringLength; i++) { if (memcmp(_s->cString + i, cString, cStringLength) != 0) continue; component = [OFString stringWithUTF8String: _s->cString + last length: i - last]; if (!skipEmpty || [component length] > 0) [array addObject: component]; |
︙ | ︙ | |||
1150 1151 1152 1153 1154 1155 1156 | return [OFString stringWithUTF8String: _s->cString + i length: pathCStringLength - i]; } - (OFString*)stringByDeletingLastPathComponent { | | | | 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 | return [OFString stringWithUTF8String: _s->cString + i length: pathCStringLength - i]; } - (OFString*)stringByDeletingLastPathComponent { size_t pathCStringLength = _s->cStringLength; if (pathCStringLength == 0) return @""; if (OF_IS_PATH_DELIMITER(_s->cString[pathCStringLength - 1])) pathCStringLength--; if (pathCStringLength == 0) return [OFString stringWithUTF8String: _s->cString length: 1]; for (size_t i = pathCStringLength - 1; i >= 1; i--) if (OF_IS_PATH_DELIMITER(_s->cString[i])) return [OFString stringWithUTF8String: _s->cString length: i]; if (OF_IS_PATH_DELIMITER(_s->cString[0])) return [OFString stringWithUTF8String: _s->cString length: 1]; |
︙ | ︙ |