@@ -13,10 +13,12 @@ #import #import #import +#import + #import "OFString.h" #import "OFExceptions.h" #import "OFMacros.h" static OF_INLINE int @@ -25,57 +27,68 @@ size_t i; BOOL utf8; utf8 = NO; + madvise((void*)str, len, MADV_SEQUENTIAL); + for (i = 0; i < len; i++) { /* No sign of UTF-8 here */ if (OF_LIKELY(~str[i] & 0x80)) continue; utf8 = YES; /* We're missing a start byte here */ - if (OF_UNLIKELY(~str[i] & 0x40)) + if (OF_UNLIKELY(~str[i] & 0x40)) { + madvise((void*)str, len, MADV_NORMAL); return -1; + } /* We have at minimum a 2 byte character -> check next byte */ - if (OF_UNLIKELY(len < i + 1 || ~str[i + 1] & 0x80 || - str[i + 1] & 0x40)) + if (OF_UNLIKELY(len < i + 1 || (str[i + 1] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); return -1; + } /* Check if we have at minimum a 3 byte character */ if (OF_LIKELY(~str[i] & 0x20)) { i++; continue; } /* We have at minimum a 3 byte char -> check second next byte */ - if (OF_UNLIKELY(len < i + 2 || ~str[i + 2] & 0x80 || - str[i + 2] & 0x40)) + if (OF_UNLIKELY(len < i + 2 || (str[i + 2] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); return -1; + } /* Check if we have a 4 byte character */ if (OF_LIKELY(~str[i] & 0x10)) { i += 2; continue; } /* We have a 4 byte character -> check third next byte */ - if (OF_UNLIKELY(len < i + 3 || ~str[i + 3] & 0x80 || - str[i + 3] & 0x40)) + if (OF_UNLIKELY(len < i + 3 || (str[i + 3] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); return -1; + } /* * Just in case, check if there's a 5th character, which is * forbidden by UTF-8 */ - if (OF_UNLIKELY(str[i] & 0x08)) + if (OF_UNLIKELY(str[i] & 0x08)) { + madvise((void*)str, len, MADV_NORMAL); return -1; + } i += 3; } + + madvise((void*)str, len, MADV_NORMAL); return (utf8 ? 1 : 0); } @implementation OFString @@ -186,33 +199,41 @@ } - reverse { size_t i, j, len = length / 2; + + madvise(string, len, MADV_SEQUENTIAL); /* We reverse all bytes and restore UTF-8 later, if necessary */ for (i = 0, j = length - 1; i < len; i++, j--) { string[i] ^= string[j]; string[j] ^= string[i]; string[i] ^= string[j]; } - if (!is_utf8) + if (!is_utf8) { + madvise(string, len, MADV_NORMAL); return self; + } for (i = 0; i < length; i++) { /* ASCII */ if (OF_LIKELY(~string[i] & 0x80)) continue; /* A start byte can't happen first as we reversed everything */ - if (OF_UNLIKELY(string[i] & 0x40)) + if (OF_UNLIKELY(string[i] & 0x40)) { + madvise(string, len, MADV_NORMAL); @throw [OFInvalidEncodingException newWithObject: self]; + } /* Next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 1 || ~string[i + 1] & 0x80)) + if (OF_UNLIKELY(length < i + 1 || ~string[i + 1] & 0x80)) { + madvise(string, len, MADV_NORMAL); @throw [OFInvalidEncodingException newWithObject: self]; + } /* Next byte is the start byte */ if (OF_LIKELY(string[i + 1] & 0x40)) { string[i] ^= string[i + 1]; string[i + 1] ^= string[i]; @@ -221,12 +242,14 @@ i++; continue; } /* Second next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 2 || ~string[i + 2] & 0x80)) + if (OF_UNLIKELY(length < i + 2 || ~string[i + 2] & 0x80)) { + madvise(string, len, MADV_NORMAL); @throw [OFInvalidEncodingException newWithObject: self]; + } /* Second next byte is the start byte */ if (OF_LIKELY(string[i + 2] & 0x40)) { string[i] ^= string[i + 2]; string[i + 2] ^= string[i]; @@ -235,12 +258,14 @@ i += 2; continue; } /* Third next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 3 || ~string[i + 3] & 0x80)) + if (OF_UNLIKELY(length < i + 3 || ~string[i + 3] & 0x80)) { + madvise(string, len, MADV_NORMAL); @throw [OFInvalidEncodingException newWithObject: self]; + } /* Third next byte is the start byte */ if (OF_LIKELY(string[i + 3] & 0x40)) { string[i] ^= string[i + 3]; string[i + 3] ^= string[i]; @@ -253,12 +278,15 @@ i += 3; continue; } /* UTF-8 does not allow more than 4 bytes per character */ + madvise(string, len, MADV_NORMAL); @throw [OFInvalidEncodingException newWithObject: self]; } + + madvise(string, len, MADV_NORMAL); return self; } - upper