@@ -9,232 +9,55 @@ * the packaging of this file. */ #import "config.h" -#define _GNU_SOURCE -#include -#include #include #include -#include - -#ifdef HAVE_SYS_MMAN_H -#include -#else -#define madvise(addr, len, advise) -#endif #import "OFString.h" -#import "OFConstString.h" +#import "OFMutableString.h" #import "OFExceptions.h" #import "OFMacros.h" -#ifndef HAVE_ASPRINTF -#import "asprintf.h" -#endif - -static OF_INLINE int -check_utf8(const char *str, size_t len) -{ - size_t i; - BOOL utf8; - - utf8 = NO; - - madvise((void*)str, len, MADV_SEQUENTIAL); - - for (i = 0; i < len; i++) { - /* No sign of UTF-8 here */ - if (OF_LIKELY(!(str[i] & 0x80))) - continue; - - utf8 = YES; - - /* We're missing a start byte here */ - if (OF_UNLIKELY(!(str[i] & 0x40))) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* We have at minimum a 2 byte character -> check next byte */ - if (OF_UNLIKELY(len < i + 1 || (str[i + 1] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* Check if we have at minimum a 3 byte character */ - if (OF_LIKELY(!(str[i] & 0x20))) { - i++; - continue; - } - - /* We have at minimum a 3 byte char -> check second next byte */ - if (OF_UNLIKELY(len < i + 2 || (str[i + 2] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* Check if we have a 4 byte character */ - if (OF_LIKELY(!(str[i] & 0x10))) { - i += 2; - continue; - } - - /* We have a 4 byte character -> check third next byte */ - if (OF_UNLIKELY(len < i + 3 || (str[i + 3] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* - * Just in case, check if there's a 5th character, which is - * forbidden by UTF-8 - */ - if (OF_UNLIKELY(str[i] & 0x08)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - i += 3; - } - - madvise((void*)str, len, MADV_NORMAL); - - return (utf8 ? 1 : 0); -} - @implementation OFString + string { - return [[[OFString alloc] init] autorelease]; + return [[[OFMutableString alloc] init] autorelease]; } + stringWithCString: (const char*)str { - return [[[OFString alloc] initWithCString: str] autorelease]; + return [[[OFMutableString alloc] initWithCString: str] autorelease]; } + stringWithFormat: (const char*)fmt, ... { id ret; va_list args; va_start(args, fmt); - ret = [[[OFString alloc] initWithFormat: fmt - andArguments: args] autorelease]; + ret = [[[OFMutableString alloc] initWithFormat: fmt + andArguments: args] autorelease]; va_end(args); return ret; } + stringWithFormat: (const char*)fmt andArguments: (va_list)args { - return [[[OFString alloc] initWithFormat: fmt - andArguments: args] autorelease]; + return [[[OFMutableString alloc] initWithFormat: fmt + andArguments: args] autorelease]; } - init { [super init]; length = 0; string = NULL; - is_utf8 = NO; - - return self; -} - -- initWithCString: (const char*)str -{ - Class c; - - self = [super init]; - - if (str != NULL) { - length = strlen(str); - - switch (check_utf8(str, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - c = isa; - [super free]; - @throw [OFInvalidEncodingException - newWithClass: c]; - } - - @try { - string = [self allocWithSize: length + 1]; - } @catch (OFException *e) { - /* - * We can't use [super free] on OS X here. Compiler bug? - * [self free] will do here as we don't reimplement - * free. - */ - [self free]; - @throw e; - } - memcpy(string, str, length + 1); - } - - return self; -} - -- initWithFormat: (const char*)fmt, ... -{ - id ret; - va_list args; - - va_start(args, fmt); - ret = [self initWithFormat: fmt - andArguments: args]; - va_end(args); - - return ret; -} - -- initWithFormat: (const char*)fmt - andArguments: (va_list)args -{ - int t; - Class c; - - self = [super init]; - - if (fmt == NULL) { - c = isa; - [super free]; - @throw [OFInvalidFormatException newWithClass: c]; - } - - if ((t = vasprintf(&string, fmt, args)) == -1) { - c = isa; - [super free]; - @throw [OFInitializationFailedException newWithClass: c]; - } - length = t; - - switch (check_utf8(string, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - free(string); - c = isa; - [super free]; - @throw [OFInvalidEncodingException newWithClass: c]; - } - - @try { - [self addToMemoryPool: string]; - } @catch (OFException *e) { - free(string); - @throw e; - } return self; } - (const char*)cString @@ -250,53 +73,23 @@ - (id)copy { return [OFString stringWithCString: string]; } -- setTo: (const char*)str -{ - size_t len; - - if (string != NULL) - [self freeMem: string]; - - len = strlen(str); - - switch (check_utf8(str, len)) { - case 1: - is_utf8 = YES; - break; - case -1: - string = NULL; - length = 0; - is_utf8 = NO; - - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - length = len; - string = [self allocWithSize: length + 1]; - memcpy(string, str, length + 1); - - return self; -} - - (BOOL)isEqual: (id)obj { - if (![obj isKindOf: [OFString class]] && - ![obj isKindOf: [OFConstString class]]) + if (![obj isKindOf: [OFString class]]) return NO; if (strcmp(string, [obj cString])) return NO; return YES; } - (int)compare: (id)obj { - if (![obj isKindOf: [OFString class]] && - ![obj isKindOf: [OFConstString class]]) + if (![obj isKindOf: [OFString class]]) @throw [OFInvalidArgumentException newWithClass: isa]; return strcmp(string, [obj cString]); } @@ -310,188 +103,55 @@ OF_HASH_ADD(hash, string[i]); OF_HASH_FINALIZE(hash); return hash; } + +- setTo: (const char*)str +{ + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; +} - append: (OFString*)str { - return [self appendCString: [str cString]]; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - appendCString: (const char*)str { - size_t strlength; - - strlength = strlen(str); - - switch (check_utf8(str, strlength)) { - case 1: - is_utf8 = YES; - break; - case -1: - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - string = [self resizeMem: string - toSize: length + strlength + 1]; - memcpy(string + length, str, strlength + 1); - length += strlength; - - return self; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - appendWithFormatCString: (const char*)fmt, ... { - id ret; - va_list args; - - va_start(args, fmt); - ret = [self appendWithFormatCString: fmt - andArguments: args]; - va_end(args); - - return ret; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - appendWithFormatCString: (const char*)fmt andArguments: (va_list)args { - char *t; - - if (fmt == NULL) - @throw [OFInvalidFormatException newWithClass: isa]; - - if ((vasprintf(&t, fmt, args)) == -1) - /* - * This is only the most likely error to happen. - * Unfortunately, as errno isn't always thread-safe, there's - * no good way for us to find out what really happened. - */ - @throw [OFNoMemException newWithClass: isa]; - - @try { - [self appendCString: t]; - } @finally { - free(t); - } - - return self; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - reverse { - size_t i, j, len = length / 2; - - madvise(string, len, MADV_SEQUENTIAL); - - /* We reverse all bytes and restore UTF-8 later, if necessary */ - for (i = 0, j = length - 1; i < len; i++, j--) { - string[i] ^= string[j]; - string[j] ^= string[i]; - string[i] ^= string[j]; - } - - if (!is_utf8) { - madvise(string, len, MADV_NORMAL); - return self; - } - - for (i = 0; i < length; i++) { - /* ASCII */ - if (OF_LIKELY(!(string[i] & 0x80))) - continue; - - /* A start byte can't happen first as we reversed everything */ - if (OF_UNLIKELY(string[i] & 0x40)) { - madvise(string, len, MADV_NORMAL); - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - /* Next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 1 || !(string[i + 1] & 0x80))) { - madvise(string, len, MADV_NORMAL); - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - /* Next byte is the start byte */ - if (OF_LIKELY(string[i + 1] & 0x40)) { - string[i] ^= string[i + 1]; - string[i + 1] ^= string[i]; - string[i] ^= string[i + 1]; - - i++; - continue; - } - - /* Second next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 2 || !(string[i + 2] & 0x80))) { - madvise(string, len, MADV_NORMAL); - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - /* Second next byte is the start byte */ - if (OF_LIKELY(string[i + 2] & 0x40)) { - string[i] ^= string[i + 2]; - string[i + 2] ^= string[i]; - string[i] ^= string[i + 2]; - - i += 2; - continue; - } - - /* Third next byte must not be ASCII */ - if (OF_UNLIKELY(length < i + 3 || !(string[i + 3] & 0x80))) { - madvise(string, len, MADV_NORMAL); - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - /* Third next byte is the start byte */ - if (OF_LIKELY(string[i + 3] & 0x40)) { - string[i] ^= string[i + 3]; - string[i + 3] ^= string[i]; - string[i] ^= string[i + 3]; - - string[i + 1] ^= string[i + 2]; - string[i + 2] ^= string[i + 1]; - string[i + 1] ^= string[i + 2]; - - i += 3; - continue; - } - - /* UTF-8 does not allow more than 4 bytes per character */ - madvise(string, len, MADV_NORMAL); - @throw [OFInvalidEncodingException newWithClass: isa]; - } - - madvise(string, len, MADV_NORMAL); - - return self; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - upper { - char *p = string + length; - - if (is_utf8) - @throw [OFInvalidEncodingException newWithClass: isa]; - - while (--p >= string) - *p = toupper((int)*p); - - return self; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } - lower { - char *p = string + length; - - if (is_utf8) - @throw [OFInvalidEncodingException newWithClass: isa]; - - while (--p >= string) - *p = tolower((int)*p); - - return self; + @throw [OFNotImplementedException newWithClass: isa + andSelector: _cmd]; } @end