@@ -63,15 +63,14 @@ return; } ulen = [self length]; - ustr = [self allocMemoryForNItems: [self length] - withSize: ulen]; + ustr = [self allocMemoryForNItems: ulen + withSize: sizeof(of_unichar_t)]; - i = 0; - j = 0; + i = j = 0; nlen = 0; while (i < length) { clen = of_string_utf8_to_unicode(string + i, length - i, &c); @@ -136,10 +135,15 @@ size_t len; [self freeMemory: string]; len = strlen(str); + + if (len >= 3 && !memcmp(str, "\xEF\xBB\xBF", 3)) { + str += 3; + len -= 3; + } switch (of_string_check_utf8(str, len)) { case 0: isUTF8 = NO; break; @@ -162,10 +166,15 @@ - (void)appendCString: (const char*)str { size_t strlength; strlength = strlen(str); + + if (strlength >= 3 && !memcmp(str, "\xEF\xBB\xBF", 3)) { + str += 3; + strlength -= 3; + } switch (of_string_check_utf8(str, strlength)) { case 1: isUTF8 = YES; break; @@ -180,10 +189,15 @@ } - (void)appendCString: (const char*)str withLength: (size_t)len { + if (len >= 3 && !memcmp(str, "\xEF\xBB\xBF", 3)) { + str += 3; + len -= 3; + } + switch (of_string_check_utf8(str, len)) { case 1: isUTF8 = YES; break; case -1: @@ -266,21 +280,21 @@ - (void)reverse { size_t i, j, len = length / 2; - madvise(string, len, MADV_SEQUENTIAL); + madvise(string, length, MADV_SEQUENTIAL); /* We reverse all bytes and restore UTF-8 later, if necessary */ for (i = 0, j = length - 1; i < len; i++, j--) { string[i] ^= string[j]; string[j] ^= string[i]; string[i] ^= string[j]; } if (!isUTF8) { - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); return; } for (i = 0; i < length; i++) { /* ASCII */ @@ -287,17 +301,17 @@ if (OF_LIKELY(!(string[i] & 0x80))) continue; /* A start byte can't happen first as we reversed everything */ if (OF_UNLIKELY(string[i] & 0x40)) { - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); @throw [OFInvalidEncodingException newWithClass: isa]; } /* Next byte must not be ASCII */ if (OF_UNLIKELY(length < i + 1 || !(string[i + 1] & 0x80))) { - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); @throw [OFInvalidEncodingException newWithClass: isa]; } /* Next byte is the start byte */ if (OF_LIKELY(string[i + 1] & 0x40)) { @@ -309,11 +323,11 @@ continue; } /* Second next byte must not be ASCII */ if (OF_UNLIKELY(length < i + 2 || !(string[i + 2] & 0x80))) { - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); @throw [OFInvalidEncodingException newWithClass: isa]; } /* Second next byte is the start byte */ if (OF_LIKELY(string[i + 2] & 0x40)) { @@ -325,11 +339,11 @@ continue; } /* Third next byte must not be ASCII */ if (OF_UNLIKELY(length < i + 3 || !(string[i + 3] & 0x80))) { - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); @throw [OFInvalidEncodingException newWithClass: isa]; } /* Third next byte is the start byte */ if (OF_LIKELY(string[i + 3] & 0x40)) { @@ -344,15 +358,15 @@ i += 3; continue; } /* UTF-8 does not allow more than 4 bytes per character */ - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); @throw [OFInvalidEncodingException newWithClass: isa]; } - madvise(string, len, MADV_NORMAL); + madvise(string, length, MADV_NORMAL); } - (void)upper { [self _applyTable: of_unicode_upper_table