Overview
Comment: | Change of_string_utf8_decode() API
It now returns <= 0 on error, with negative values being the number of This can be used to detect cut off and how many bytes are missing. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
5e2ef97c35ba3911a76fe45afb629244 |
User & Date: | js on 2016-03-13 11:19:13 |
Other Links: | manifest | tags |
Context
2016-03-13
| ||
12:39 | OFRunLoop: Tiny documentation improvement check-in: d4555b9c1a user: js tags: trunk | |
11:19 | Change of_string_utf8_decode() API check-in: 5e2ef97c35 user: js tags: trunk | |
10:24 | OFStdIOStream_Win32Console: Improve reading check-in: 566d4df603 user: js tags: trunk | |
Changes
Modified src/OFMutableString_UTF8.m from [3b7cce9749] to [f55475cd25].
︙ | ︙ | |||
105 106 107 108 109 110 111 | i = j = 0; newCStringLength = 0; while (i < _s->cStringLength) { const of_unichar_t *const *table; size_t tableSize; of_unichar_t c; | | | | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | i = j = 0; newCStringLength = 0; while (i < _s->cStringLength) { const of_unichar_t *const *table; size_t tableSize; of_unichar_t c; ssize_t cLen; if (isStart) { table = startTable; tableSize = middleTableSize; } else { table = middleTable; tableSize = middleTableSize; } cLen = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c); if (cLen <= 0 || c > 0x10FFFF) { [self freeMemory: unicodeString]; @throw [OFInvalidEncodingException exception]; } switch (c) { case ' ': case '\t': |
︙ | ︙ | |||
200 201 202 203 204 205 206 | } - (void)setCharacter: (of_unichar_t)character atIndex: (size_t)index { char buffer[4]; of_unichar_t c; | | > | | | | | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | } - (void)setCharacter: (of_unichar_t)character atIndex: (size_t)index { char buffer[4]; of_unichar_t c; size_t lenNew; ssize_t lenOld; if (_s->isUTF8) index = of_string_utf8_get_position(_s->cString, index, _s->cStringLength); if (index > _s->cStringLength) @throw [OFOutOfRangeException exception]; /* Shortcut if old and new character both are ASCII */ if (!(character & 0x80) && !(_s->cString[index] & 0x80)) { _s->hashed = false; _s->cString[index] = character; return; } if ((lenNew = of_string_utf8_encode(character, buffer)) == 0) @throw [OFInvalidEncodingException exception]; if ((lenOld = of_string_utf8_decode(_s->cString + index, _s->cStringLength - index, &c)) <= 0) @throw [OFInvalidEncodingException exception]; _s->hashed = false; if (lenNew == (size_t)lenOld) memcpy(_s->cString + index, buffer, lenNew); else if (lenNew > (size_t)lenOld) { _s->cString = [self resizeMemory: _s->cString size: _s->cStringLength - lenOld + lenNew + 1]; memmove(_s->cString + index + lenNew, _s->cString + index + lenOld, _s->cStringLength - index - lenOld); memcpy(_s->cString + index, buffer, lenNew); _s->cStringLength -= lenOld; _s->cStringLength += lenNew; _s->cString[_s->cStringLength] = '\0'; if (character & 0x80) _s->isUTF8 = true; } else if (lenNew < (size_t)lenOld) { memmove(_s->cString + index + lenNew, _s->cString + index + lenOld, _s->cStringLength - index - lenOld); memcpy(_s->cString + index, buffer, lenNew); _s->cStringLength -= lenOld; _s->cStringLength += lenNew; |
︙ | ︙ |
Modified src/OFStdIOStream_Win32Console.m from [88dea4ced5] to [f55d77f8b8].
︙ | ︙ | |||
39 40 41 42 43 44 45 46 47 48 49 50 51 52 | #define OF_STDIO_STREAM_WIN32_CONSOLE_M #include "config.h" #import "OFStdIOStream_Win32Console.h" #import "OFStdIOStream+Private.h" #import "OFDataArray.h" #import "OFInvalidArgumentException.h" #import "OFInvalidEncodingException.h" #import "OFOutOfRangeException.h" #import "OFReadFailedException.h" #import "OFWriteFailedException.h" | > | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | #define OF_STDIO_STREAM_WIN32_CONSOLE_M #include "config.h" #import "OFStdIOStream_Win32Console.h" #import "OFStdIOStream+Private.h" #import "OFString.h" #import "OFDataArray.h" #import "OFInvalidArgumentException.h" #import "OFInvalidEncodingException.h" #import "OFOutOfRangeException.h" #import "OFReadFailedException.h" #import "OFWriteFailedException.h" |
︙ | ︙ | |||
219 220 221 222 223 224 225 | count: length * 2]; @try { size_t i = 0, j = 0; DWORD written; while (i < length) { of_unichar_t c; | | | | | | 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | count: length * 2]; @try { size_t i = 0, j = 0; DWORD written; while (i < length) { of_unichar_t c; size_t UTF8Len; UTF8Len = of_string_utf8_decode(buffer + i, length - i, &c); if (UTF8Len <= 0 || c > 0x10FFFF) @throw [OFInvalidEncodingException exception]; if (c > 0xFFFF) { c -= 0x10000; tmp[j++] = 0xD800 | (c >> 10); tmp[j++] = 0xDC00 | (c & 0x3FF); } else tmp[j++] = c; i += UTF8Len; } if (!WriteConsoleW(_handle, tmp, j, &written, NULL) || written != j) @throw [OFWriteFailedException exceptionWithObject: self requestedLength: j]; } @finally { [self freeMemory: tmp]; } } @end |
Modified src/OFString.h from [a580fc1ea1] to [f6c039afa4].
︙ | ︙ | |||
1096 1097 1098 1099 1100 1101 1102 | #endif @end #ifdef __cplusplus extern "C" { #endif extern size_t of_string_utf8_encode(of_unichar_t, char*); | | | 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 | #endif @end #ifdef __cplusplus extern "C" { #endif extern size_t of_string_utf8_encode(of_unichar_t, char*); extern ssize_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); extern size_t of_string_utf16_length(const of_char16_t*); extern size_t of_string_utf32_length(const of_char32_t*); #ifdef __cplusplus } #endif OF_ASSUME_NONNULL_END |
︙ | ︙ |
Modified src/OFString.m from [e3e6e72034] to [b713fffc4b].
︙ | ︙ | |||
124 125 126 127 128 129 130 | buffer[i] = 0x80 | (character & 0x3F); return 4; } return 0; } | | > | | | | 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | buffer[i] = 0x80 | (character & 0x3F); return 4; } return 0; } ssize_t of_string_utf8_decode(const char *buffer_, size_t length, of_unichar_t *ret) { /* FIXME: Check if the following bytes are indeed surrogates */ const uint8_t *buffer = (const uint8_t*)buffer_; if (!(*buffer & 0x80)) { *ret = buffer[0]; return 1; } if ((*buffer & 0xE0) == 0xC0) { if OF_UNLIKELY (length < 2) return -2; *ret = ((buffer[0] & 0x1F) << 6) | (buffer[1] & 0x3F); return 2; } if ((*buffer & 0xF0) == 0xE0) { if OF_UNLIKELY (length < 3) return -3; *ret = ((buffer[0] & 0x0F) << 12) | ((buffer[1] & 0x3F) << 6) | (buffer[2] & 0x3F); return 3; } if ((*buffer & 0xF8) == 0xF0) { if OF_UNLIKELY (length < 4) return -4; *ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) | ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F); return 4; } return 0; |
︙ | ︙ |
Modified src/OFString_UTF8.m from [f9779f2392] to [c460dbfe91].
︙ | ︙ | |||
804 805 806 807 808 809 810 | return OF_ORDERED_ASCENDING; } i = j = 0; while (i < _s->cStringLength && j < otherCStringLength) { of_unichar_t c1, c2; | | | | 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 | return OF_ORDERED_ASCENDING; } i = j = 0; while (i < _s->cStringLength && j < otherCStringLength) { of_unichar_t c1, c2; ssize_t l1, l2; l1 = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c1); l2 = of_string_utf8_decode(otherCString + j, otherCStringLength - j, &c2); if (l1 <= 0 || l2 <= 0 || c1 > 0x10FFFF || c2 > 0x10FFFF) @throw [OFInvalidEncodingException exception]; if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) { of_unichar_t tc = of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF]; if (tc) |
︙ | ︙ | |||
858 859 860 861 862 863 864 | if (_s->hashed) return _s->hash; OF_HASH_INIT(hash); for (size_t i = 0; i < _s->cStringLength; i++) { of_unichar_t c; | | | | 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 | if (_s->hashed) return _s->hash; OF_HASH_INIT(hash); for (size_t i = 0; i < _s->cStringLength; i++) { of_unichar_t c; ssize_t length; if ((length = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c)) <= 0) @throw [OFInvalidEncodingException exception]; OF_HASH_ADD(hash, (c & 0xFF0000) >> 16); OF_HASH_ADD(hash, (c & 0x00FF00) >> 8); OF_HASH_ADD(hash, c & 0x0000FF); i += length - 1; |
︙ | ︙ | |||
892 893 894 895 896 897 898 | if (!_s->isUTF8) return _s->cString[index]; index = of_string_utf8_get_position(_s->cString, index, _s->cStringLength); | | | | 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 | if (!_s->isUTF8) return _s->cString[index]; index = of_string_utf8_get_position(_s->cString, index, _s->cStringLength); if (of_string_utf8_decode(_s->cString + index, _s->cStringLength - index, &character) <= 0) @throw [OFInvalidEncodingException exception]; return character; } - (void)getCharacters: (of_unichar_t*)buffer inRange: (of_range_t)range |
︙ | ︙ | |||
1188 1189 1190 1191 1192 1193 1194 | ret = [object allocMemoryWithSize: sizeof(of_unichar_t) count: _s->length]; i = j = 0; while (i < _s->cStringLength) { of_unichar_t c; | | | | 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 | ret = [object allocMemoryWithSize: sizeof(of_unichar_t) count: _s->length]; i = j = 0; while (i < _s->cStringLength) { of_unichar_t c; ssize_t cLen; cLen = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c); if (cLen <= 0 || c > 0x10FFFF) @throw [OFInvalidEncodingException exception]; ret[j++] = c; i += cLen; } return ret; |
︙ | ︙ | |||
1216 1217 1218 1219 1220 1221 1222 | ret = [object allocMemoryWithSize: sizeof(of_unichar_t) count: _s->length + 1]; i = j = 0; while (i < _s->cStringLength) { of_unichar_t c; | | | | 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 | ret = [object allocMemoryWithSize: sizeof(of_unichar_t) count: _s->length + 1]; i = j = 0; while (i < _s->cStringLength) { of_unichar_t c; ssize_t cLen; cLen = of_string_utf8_decode(_s->cString + i, _s->cStringLength - i, &c); if (cLen <= 0 || c > 0x10FFFF) @throw [OFInvalidEncodingException exception]; if (byteOrder != OF_BYTE_ORDER_NATIVE) ret[j++] = OF_BSWAP32(c); else ret[j++] = c; |
︙ | ︙ |