Index: src/OFFile.m ================================================================== --- src/OFFile.m +++ src/OFFile.m @@ -161,11 +161,11 @@ return self; } - (void)dealloc { - if (close == YES && fp != NULL) + if (close && fp != NULL) fclose(fp); [super dealloc]; } Index: src/OFMutableString.m ================================================================== --- src/OFMutableString.m +++ src/OFMutableString.m @@ -14,11 +14,11 @@ #define _GNU_SOURCE #include #include #include #include -#include +#include #ifdef HAVE_MADVISE #include #else #define madvise(addr, len, advise) @@ -27,10 +27,13 @@ #import "OFMutableString.h" #import "OFExceptions.h" #import "OFMacros.h" #import "asprintf.h" + +extern const of_unichar_t* const of_unicode_upper_table[0x1100]; +extern const of_unichar_t* const of_unicode_lower_table[0x1100]; @implementation OFMutableString - setToCString: (const char*)str { size_t len; @@ -274,30 +277,196 @@ return self; } - upper { - char *p = string + length; + of_unichar_t c, uc; + of_unichar_t *ustr; + size_t ulen, nlen; + size_t i, j, d; + char *nstr; + + if (!is_utf8) { + uint8_t *p = (uint8_t*)string + length; + uint8_t t; + + while (--p >= (uint8_t*)string) { + t = of_unicode_upper_table[0][*p]; + if (t != 0) + *p = t; + } + + return self; + } + + ulen = [self length]; + ustr = [self allocMemoryForNItems: [self length] + withSize: ulen]; + + j = 0; + nlen = 0; + + for (i = 0; i < length; i++) { + c = of_string_utf8_to_unicode(string + i, length - i); + + if (c == OF_INVALID_UNICHAR || c > 0x10FFFF) { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + + uc = of_unicode_upper_table[c >> 8][c & 0xFF]; + if (uc == 0) + uc = c; + ustr[j++] = uc; + + if (uc < 0x80) + nlen++; + else if (uc < 0x800) + nlen += 2; + else if (uc < 0x10000) + nlen += 3; + else if (uc < 0x110000) + nlen += 4; + else { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + + if (c < 0x80); + else if (c < 0x800) + i++; + else if (c < 0x10000) + i += 2; + else if (c < 0x110000) + i += 3; + else { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + } + + @try { + nstr = [self allocMemoryWithSize: nlen + 1]; + } @catch (OFException *e) { + [self freeMemory: ustr]; + @throw e; + } + + j = 0; + + for (i = 0; i < ulen; i++) { + if ((d = of_string_unicode_to_utf8(ustr[i], nstr + j)) == 0) { + [self freeMemory: ustr]; + [self freeMemory: nstr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + j += d; + } - if (is_utf8) - @throw [OFInvalidEncodingException newWithClass: isa]; + assert(j == nlen); + nstr[j] = 0; + [self freeMemory: ustr]; - while (--p >= string) - *p = toupper((int)*p); + [self freeMemory: string]; + string = nstr; + length = nlen; return self; } - lower { - char *p = string + length; + of_unichar_t c, lc; + of_unichar_t *ustr; + size_t ulen, nlen; + size_t i, j, d; + char *nstr; + + if (!is_utf8) { + uint8_t *p = (uint8_t*)string + length; + uint8_t t; + + while (--p >= (uint8_t*)string) { + t = of_unicode_lower_table[0][*p]; + if (t != 0) + *p = t; + } + + return self; + } + + ulen = [self length]; + ustr = [self allocMemoryForNItems: [self length] + withSize: ulen]; + + j = 0; + nlen = 0; + + for (i = 0; i < length; i++) { + c = of_string_utf8_to_unicode(string + i, length - i); + + if (c == OF_INVALID_UNICHAR || c > 0x10FFFF) { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + + lc = of_unicode_lower_table[c >> 8][c & 0xFF]; + if (lc == 0) + lc = c; + ustr[j++] = lc; + + if (lc < 0x80) + nlen++; + else if (lc < 0x800) + nlen += 2; + else if (lc < 0x10000) + nlen += 3; + else if (lc < 0x110000) + nlen += 4; + else { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + + if (c < 0x80); + else if (c < 0x800) + i++; + else if (c < 0x10000) + i += 2; + else if (c < 0x110000) + i += 3; + else { + [self freeMemory: ustr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + } + + @try { + nstr = [self allocMemoryWithSize: nlen + 1]; + } @catch (OFException *e) { + [self freeMemory: ustr]; + @throw e; + } + + j = 0; + + for (i = 0; i < ulen; i++) { + if ((d = of_string_unicode_to_utf8(ustr[i], nstr + j)) == 0) { + [self freeMemory: ustr]; + [self freeMemory: nstr]; + @throw [OFInvalidEncodingException newWithClass: isa]; + } + j += d; + } - if (is_utf8) - @throw [OFInvalidEncodingException newWithClass: isa]; + assert(j == nlen); + nstr[j] = 0; + [self freeMemory: ustr]; - while (--p >= string) - *p = tolower((int)*p); + [self freeMemory: string]; + string = nstr; + length = nlen; return self; } - removeCharactersFromIndex: (size_t)start Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -143,27 +143,29 @@ of_unichar_t of_string_utf8_to_unicode(const char *buf_, size_t len) { const uint8_t *buf = (const uint8_t*)buf_; - if (*buf < 0x80) + if (!(*buf & 0x80)) return buf[0]; - switch (*buf & 0xF0) { - case 0xC0: - case 0xD0: + if ((*buf & 0xE0) == 0xC0) { if (OF_UNLIKELY(len < 2)) return OF_INVALID_UNICHAR; return ((buf[0] & 0x1F) << 6) | (buf[1] & 0x3F); - case 0xE0: + } + + if ((*buf & 0xF0) == 0xE0) { if (OF_UNLIKELY(len < 3)) return OF_INVALID_UNICHAR; return ((buf[0] & 0x0F) << 12) | ((buf[1] & 0x3F) << 6) | (buf[2] & 0x3F); - case 0xF0: + } + + if ((*buf & 0xF8) == 0xF0) { if (OF_UNLIKELY(len < 4)) return OF_INVALID_UNICHAR; return ((buf[0] & 0x07) << 18) | ((buf[1] & 0x3F) << 12) | ((buf[2] & 0x3F) << 6) | (buf[3] & 0x3F); @@ -325,11 +327,11 @@ break; case OF_STRING_ENCODING_ISO_8859_1: case OF_STRING_ENCODING_ISO_8859_15: case OF_STRING_ENCODING_WINDOWS_1252: for (i = j = 0; i < len; i++) { - if ((uint8_t)str[i] < 0x80) + if (!(str[i] & 0x80)) string[j++] = str[i]; else { char buf[4]; of_unichar_t chr; size_t chr_bytes; Index: tests/string.m ================================================================== --- tests/string.m +++ tests/string.m @@ -75,13 +75,17 @@ EXPECT_EXCEPTION(@"Detect out of range in -[characterAtIndex:]", OFOutOfRangeException, [s[0] characterAtIndex: 7]) TEST(@"-[reverse]", [[s[0] reverse] isEqual: @"3𝄞1€sät"]) - s[0] = [OFMutableString stringWithString: @"321tset"]; - TEST(@"-[upper]", [[s[0] upper] isEqual: @"321TSET"]) - TEST(@"-[lower]", [[s[0] lower] isEqual: @"321tset"]) + s[1] = [OFMutableString stringWithString: @"abc"]; + + TEST(@"-[upper]", [[s[0] upper] isEqual: @"3𝄞1€SÄT"] && + [[s[1] upper] isEqual: @"ABC"]) + + TEST(@"-[lower]", [[s[0] lower] isEqual: @"3𝄞1€sät"] && + [[s[1] lower] isEqual: @"abc"]) TEST(@"+[stringWithCString:length:]", (s[0] = [OFMutableString stringWithCString: "foobar" length: 3]) && [s[0] isEqual: @"foo"])