Overview
Comment: | Add -[OFString lossyCStringWithEncoding:]. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
1b0a5cf8296c4ceda311a7943e07d934 |
User & Date: | js on 2014-01-17 03:12:30 |
Other Links: | manifest | tags |
Context
2014-01-17
| ||
16:41 | Move Unicode -> * conversions to separate files. check-in: a1bcdc2ff6 user: js tags: trunk | |
03:12 | Add -[OFString lossyCStringWithEncoding:]. check-in: 1b0a5cf829 user: js tags: trunk | |
2014-01-16
| ||
23:38 | Add +[OFString nativeOSEncoding]. check-in: e54c8c0368 user: js tags: trunk | |
Changes
Modified src/OFString.h from [0173474bc3] to [e9c1948895].
︙ | ︙ | |||
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 | * @return The number of bytes written into the C string, without the * terminating zero */ - (size_t)getCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding; /*! * @brief Returns the OFString as a C string in the specified encoding. * * The result is valid until the autorelease pool is released. If you want to * use the result outside the scope of the current autorelease pool, you have to * copy it. * * @param encoding The encoding for the C string * @return The OFString as a C string in the specified encoding */ - (const char*)cStringWithEncoding: (of_string_encoding_t)encoding OF_RETURNS_INNER_POINTER; /*! * @brief Returns the OFString as a UTF-8 encoded C string. * * The result is valid until the autorelease pool is released. If you want to * use the result outside the scope of the current autorelease pool, you have to * copy it. * | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 | * @return The number of bytes written into the C string, without the * terminating zero */ - (size_t)getCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding; /*! * @brief Writes the OFString into the specified C string with the specified * encoding, replacing characters that cannot be represented in the * specified encoding with a question mark. * * @param cString The C string to write into * @param maxLength The maximum number of bytes to write into the C string, * including the terminating zero * @param encoding The encoding to use for writing into the C string * @return The number of bytes written into the C string, without the * terminating zero */ - (size_t)getLossyCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding; /*! * @brief Returns the OFString as a C string in the specified encoding. * * The result is valid until the autorelease pool is released. If you want to * use the result outside the scope of the current autorelease pool, you have to * copy it. * * @param encoding The encoding for the C string * @return The OFString as a C string in the specified encoding */ - (const char*)cStringWithEncoding: (of_string_encoding_t)encoding OF_RETURNS_INNER_POINTER; /*! * @brief Returns the OFString as a C string in the specified encoding, * replacing characters that cannot be represented in the specified * encoding with a question mark. * * The result is valid until the autorelease pool is released. If you want to * use the result outside the scope of the current autorelease pool, you have to * copy it. * * @param encoding The encoding for the C string * @return The OFString as a C string in the specified encoding */ - (const char*)lossyCStringWithEncoding: (of_string_encoding_t)encoding OF_RETURNS_INNER_POINTER; /*! * @brief Returns the OFString as a UTF-8 encoded C string. * * The result is valid until the autorelease pool is released. If you want to * use the result outside the scope of the current autorelease pool, you have to * copy it. * |
︙ | ︙ |
Modified src/OFString.m from [548eca2146] to [48162efaaf].
︙ | ︙ | |||
61 62 63 64 65 66 67 68 69 70 71 72 73 74 | /* * It seems strtod is buggy on Win32. * However, the MinGW version __strtod seems to be ok. */ #ifdef _WIN32 # define strtod __strtod #endif /* References for static linking */ void _references_to_categories_of_OFString(void) { _OFString_Hashing_reference = 1; _OFString_JSONValue_reference = 1; _OFString_Serialization_reference = 1; | > > > > > > > > > | 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | /* * It seems strtod is buggy on Win32. * However, the MinGW version __strtod seems to be ok. */ #ifdef _WIN32 # define strtod __strtod #endif @interface OFString (OF_PRIVATE_CATEGORY) - (size_t)OF_getCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding lossy: (bool)lossy; - (const char*)OF_cStringWithEncoding: (of_string_encoding_t)encoding lossy: (bool)lossy; @end /* References for static linking */ void _references_to_categories_of_OFString(void) { _OFString_Hashing_reference = 1; _OFString_JSONValue_reference = 1; _OFString_Serialization_reference = 1; |
︙ | ︙ | |||
957 958 959 960 961 962 963 | [self release]; @throw e; } return self; } | | | | > | 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 | [self release]; @throw e; } return self; } - (size_t)OF_getCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding lossy: (bool)lossy { const of_unichar_t *characters = [self characters]; size_t i, length = [self length]; switch (encoding) { case OF_STRING_ENCODING_UTF_8:; size_t j = 0; |
︙ | ︙ | |||
993 994 995 996 997 998 999 | case 3: case 4: memcpy(cString + j, buffer, len); j += len; break; default: | > > > | > > > | > > > | | > | | > > > | | > | | 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 | case 3: case 4: memcpy(cString + j, buffer, len); j += len; break; default: if (lossy) cString[j++] = '?'; else @throw [OFInvalidEncodingException exception]; break; } } cString[j] = '\0'; return j; case OF_STRING_ENCODING_ASCII: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; for (i = 0; i < length; i++) { if OF_UNLIKELY (characters[i] > 0x80) { if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; } else cString[i] = (char)characters[i]; } cString[i] = '\0'; return length; case OF_STRING_ENCODING_ISO_8859_1: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; for (i = 0; i < length; i++) { if OF_UNLIKELY (characters[i] > 0xFF) { if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; } else cString[i] = (uint8_t)characters[i]; } cString[i] = '\0'; return length; case OF_STRING_ENCODING_ISO_8859_15: if (length + 1 > maxLength) |
︙ | ︙ | |||
1044 1045 1046 1047 1048 1049 1050 | case 0xA6: case 0xA8: case 0xB4: case 0xB8: case 0xBC: case 0xBD: case 0xBE: | > > > | > > > | 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 | case 0xA6: case 0xA8: case 0xB4: case 0xB8: case 0xBC: case 0xBD: case 0xBE: if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; break; } if OF_UNLIKELY (c > 0xFF) { switch (c) { case 0x20AC: cString[i] = 0xA4; break; |
︙ | ︙ | |||
1074 1075 1076 1077 1078 1079 1080 | case 0x153: cString[i] = 0xBD; break; case 0x178: cString[i] = 0xBE; break; default: | > > > > | | > > | > > > | > > | 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 | case 0x153: cString[i] = 0xBD; break; case 0x178: cString[i] = 0xBE; break; default: if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; break; } } else cString[i] = (uint8_t)c; } cString[i] = '\0'; return length; case OF_STRING_ENCODING_WINDOWS_1252: if (length + 1 > maxLength) @throw [OFOutOfRangeException exception]; for (i = 0; i < length; i++) { of_unichar_t c = characters[i]; if OF_UNLIKELY (c >= 0x80 && c <= 0x9F) { if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; } if OF_UNLIKELY (c > 0xFF) { switch (c) { case 0x20AC: cString[i] = 0x80; break; case 0x201A: |
︙ | ︙ | |||
1178 1179 1180 1181 1182 1183 1184 | case 0x17E: cString[i] = 0x9E; break; case 0x178: cString[i] = 0x9F; break; default: | > > > > | | > > > > > > > > > > > > > > > > > > > > > > | > | | | > | | | > > > > > > > > > > > > > | 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 | case 0x17E: cString[i] = 0x9E; break; case 0x178: cString[i] = 0x9F; break; default: if (lossy) cString[i] = '?'; else @throw [OFInvalidEncodingException exception]; break; } } else cString[i] = (uint8_t)c; } cString[i] = '\0'; return length; default: @throw [OFNotImplementedException exceptionWithSelector: _cmd object: self]; } } - (size_t)getCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding { return [self OF_getCString: cString maxLength: maxLength encoding: encoding lossy: false]; } - (size_t)getLossyCString: (char*)cString maxLength: (size_t)maxLength encoding: (of_string_encoding_t)encoding { return [self OF_getCString: cString maxLength: maxLength encoding: encoding lossy: true]; } - (const char*)OF_cStringWithEncoding: (of_string_encoding_t)encoding lossy: (bool)lossy { OFObject *object = [[[OFObject alloc] init] autorelease]; size_t length = [self length]; char *cString; switch (encoding) { case OF_STRING_ENCODING_UTF_8:; size_t cStringLength; cString = [object allocMemoryWithSize: (length * 4) + 1]; cStringLength = [self OF_getCString: cString maxLength: (length * 4) + 1 encoding: OF_STRING_ENCODING_UTF_8 lossy: lossy]; @try { cString = [object resizeMemory: cString size: cStringLength + 1]; } @catch (OFOutOfMemoryException *e) { /* We don't care, as we only tried to make it smaller */ } break; case OF_STRING_ENCODING_ASCII: case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1252: cString = [object allocMemoryWithSize: length + 1]; [self OF_getCString: cString maxLength: length + 1 encoding: encoding lossy: lossy]; break; default: @throw [OFNotImplementedException exceptionWithSelector: _cmd object: self]; } return cString; } - (const char*)cStringWithEncoding: (of_string_encoding_t)encoding { return [self OF_cStringWithEncoding: encoding lossy: false]; } - (const char*)lossyCStringWithEncoding: (of_string_encoding_t)encoding { return [self OF_cStringWithEncoding: encoding lossy: true]; } - (const char*)UTF8String { return [self cStringWithEncoding: OF_STRING_ENCODING_UTF_8]; } - (size_t)length |
︙ | ︙ |
Modified tests/OFStringTests.m from [d490274d6f] to [b2e299ece7].
︙ | ︙ | |||
215 216 217 218 219 220 221 222 223 224 225 226 227 228 | [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88" "\x89\x8A\x8B\x8C\x8E\x91\x92\x93" "\x94\x95\x96\x97\x98\x99\x9A\x9B" "\x9C\x9E\x9F" encoding: OF_STRING_ENCODING_WINDOWS_1252] isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"]) TEST(@"+[stringWithFormat:]", [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123]) isEqual: @"test:123"]) TEST(@"-[appendFormat:]", R(([s[0] appendFormat: @"%02X", 15])) && [s[0] isEqual: @"test:1230F"]) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | [[OFString stringWithCString: "\x80\x82\x83\x84\x85\x86\x87\x88" "\x89\x8A\x8B\x8C\x8E\x91\x92\x93" "\x94\x95\x96\x97\x98\x99\x9A\x9B" "\x9C\x9E\x9F" encoding: OF_STRING_ENCODING_WINDOWS_1252] isEqual: @"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"]) TEST(@"Conversion of Codepage 437 to UTF-8", [[OFString stringWithCString: "\xB0\xB1\xB2\xDB" encoding: OF_STRING_ENCODING_CODEPAGE_437] isEqual: @"░▒▓█"]) TEST(@"Conversion of UTF-8 to ASCII #1", !strcmp([@"This is a test" cStringWithEncoding: OF_STRING_ENCODING_ASCII], "This is a test")) EXPECT_EXCEPTION(@"Conversion of UTF-8 to ASCII #2", OFInvalidEncodingException, [@"This is a tést" cStringWithEncoding: OF_STRING_ENCODING_ASCII]) TEST(@"Conversion of UTF-8 to ISO-8859-1 #1", !strcmp([@"This is ä test" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 test")) EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-1 #2", OFInvalidEncodingException, [@"This is ä t€st" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1]) TEST(@"Conversion of UTF-8 to ISO-8859-15 #1", !strcmp([@"This is ä t€st" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st")) EXPECT_EXCEPTION(@"Conversion of UTF-8 to ISO-8859-15 #2", OFInvalidEncodingException, [@"This is ä t€st…" cStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15]) TEST(@"Conversion of UTF-8 to Windows-1252 #1", !strcmp([@"This is ä t€st…" cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85")) EXPECT_EXCEPTION(@"Conversion of UTF-8 to Windows-1252 #2", OFInvalidEncodingException, [@"This is ä t€st…‼" cStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252]) TEST(@"Lossy conversion of UTF-8 to ASCII", !strcmp([@"This is a tést" lossyCStringWithEncoding: OF_STRING_ENCODING_ASCII], "This is a t?st")) TEST(@"Lossy conversion of UTF-8 to ISO-8859-1", !strcmp([@"This is ä t€st" lossyCStringWithEncoding: OF_STRING_ENCODING_ISO_8859_1], "This is \xE4 t?st")) TEST(@"Lossy conversion of UTF-8 to ISO-8859-15", !strcmp([@"This is ä t€st…" lossyCStringWithEncoding: OF_STRING_ENCODING_ISO_8859_15], "This is \xE4 t\xA4st?")) TEST(@"Lossy conversion of UTF-8 to Windows-1252", !strcmp([@"This is ä t€st…‼" lossyCStringWithEncoding: OF_STRING_ENCODING_WINDOWS_1252], "This is \xE4 t\x80st\x85?")) TEST(@"+[stringWithFormat:]", [(s[0] = [OFMutableString stringWithFormat: @"%@:%d", @"test", 123]) isEqual: @"test:123"]) TEST(@"-[appendFormat:]", R(([s[0] appendFormat: @"%02X", 15])) && [s[0] isEqual: @"test:1230F"]) |
︙ | ︙ |