Index: src/OFMutableString.h ================================================================== --- src/OFMutableString.h +++ src/OFMutableString.h @@ -16,37 +16,6 @@ /** * A class for storing and modifying strings. */ @interface OFMutableString: OFString -{ - BOOL is_utf8; -} - -/** - * Initializes an already allocated OFMutableString from a C string. - * - * \param str A C string to initialize the OFMutableString with - * \return An initialized OFMutableString - */ -- initWithCString: (const char*)str; - -/** - * Initializes an already allocated OFMutableString from a format C string. - * See printf for the format syntax. - * - * \param fmt A string used as format to initialize the OFMutableString - * \return An initialized OFMutableString - */ -- initWithFormat: (OFString*)fmt, ...; - -/** - * Initializes an already allocated OFMutableString from a format C string. - * See printf for the format syntax. - * - * \param fmt A string used as format to initialize the OFMutableString - * \param args The arguments used in the format string - * \return An initialized OFMutableString - */ -- initWithFormat: (OFString*)fmt - andArguments: (va_list)args; @end Index: src/OFMutableString.m ================================================================== --- src/OFMutableString.m +++ src/OFMutableString.m @@ -10,13 +10,13 @@ */ #import "config.h" #define _GNU_SOURCE +#include #include #include -#include #include #include #ifdef HAVE_SYS_MMAN_H #include @@ -30,183 +30,21 @@ #ifndef HAVE_ASPRINTF #import "asprintf.h" #endif -static OF_INLINE int -check_utf8(const char *str, size_t len) -{ - size_t i; - BOOL utf8; - - utf8 = NO; - - madvise((void*)str, len, MADV_SEQUENTIAL); - - for (i = 0; i < len; i++) { - /* No sign of UTF-8 here */ - if (OF_LIKELY(!(str[i] & 0x80))) - continue; - - utf8 = YES; - - /* We're missing a start byte here */ - if (OF_UNLIKELY(!(str[i] & 0x40))) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* We have at minimum a 2 byte character -> check next byte */ - if (OF_UNLIKELY(len < i + 1 || (str[i + 1] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* Check if we have at minimum a 3 byte character */ - if (OF_LIKELY(!(str[i] & 0x20))) { - i++; - continue; - } - - /* We have at minimum a 3 byte char -> check second next byte */ - if (OF_UNLIKELY(len < i + 2 || (str[i + 2] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* Check if we have a 4 byte character */ - if (OF_LIKELY(!(str[i] & 0x10))) { - i += 2; - continue; - } - - /* We have a 4 byte character -> check third next byte */ - if (OF_UNLIKELY(len < i + 3 || (str[i + 3] & 0xC0) != 0x80)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - /* - * Just in case, check if there's a 5th character, which is - * forbidden by UTF-8 - */ - if (OF_UNLIKELY(str[i] & 0x08)) { - madvise((void*)str, len, MADV_NORMAL); - return -1; - } - - i += 3; - } - - madvise((void*)str, len, MADV_NORMAL); - - return (utf8 ? 1 : 0); -} - @implementation OFMutableString -- initWithCString: (const char*)str -{ - Class c; - - self = [super init]; - - if (str != NULL) { - length = strlen(str); - - switch (check_utf8(str, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - c = isa; - [super dealloc]; - @throw [OFInvalidEncodingException - newWithClass: c]; - } - - @try { - string = [self allocWithSize: length + 1]; - } @catch (OFException *e) { - /* - * We can't use [super dealloc] on OS X here. - * Compiler bug? Anyway, [self dealloc] will do here as - * we don't reimplement dealloc. - */ - [self dealloc]; - @throw e; - } - memcpy(string, str, length + 1); - } - - return self; -} - -- initWithFormat: (OFString*)fmt, ... -{ - id ret; - va_list args; - - va_start(args, fmt); - ret = [self initWithFormat: fmt - andArguments: args]; - va_end(args); - - return ret; -} - -- initWithFormat: (OFString*)fmt - andArguments: (va_list)args -{ - int t; - Class c; - - self = [super init]; - - if (fmt == NULL) { - c = isa; - [super dealloc]; - @throw [OFInvalidFormatException newWithClass: c]; - } - - if ((t = vasprintf(&string, [fmt cString], args)) == -1) { - c = isa; - [super dealloc]; - @throw [OFInitializationFailedException newWithClass: c]; - } - length = t; - - switch (check_utf8(string, length)) { - case 1: - is_utf8 = YES; - break; - case -1: - free(string); - c = isa; - [super dealloc]; - @throw [OFInvalidEncodingException newWithClass: c]; - } - - @try { - [self addToMemoryPool: string]; - } @catch (OFException *e) { - free(string); - @throw e; - } - - return self; -} - -- setTo: (const char*)str +- setToCString: (const char*)str { size_t len; if (string != NULL) [self freeMem: string]; len = strlen(str); - switch (check_utf8(str, len)) { + switch (of_string_check_utf8(str, len)) { case 1: is_utf8 = YES; break; case -1: string = NULL; @@ -232,11 +70,11 @@ { size_t strlength; strlength = strlen(str); - switch (check_utf8(str, strlength)) { + switch (of_string_check_utf8(str, strlength)) { case 1: is_utf8 = YES; break; case -1: @throw [OFInvalidEncodingException newWithClass: isa]; Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -13,10 +13,12 @@ #include #import "OFObject.h" #import "OFArray.h" +extern int of_string_check_utf8(const char *str, size_t len); + /** * A class for managing strings. */ @interface OFString: OFObject { @@ -27,36 +29,37 @@ int length; #if __LP64__ int _unused; #endif #endif + BOOL is_utf8; } /** * \return A new autoreleased OFMutableString */ + string; /** - * Creates a new OFMutableString from a C string. + * Creates a new OFString from a C string. * * \param str A C string to initialize the OFMutableString with * \return A new autoreleased OFMutableString */ + stringWithCString: (const char*)str; /** - * Creates a new OFMutableString from a format C string. + * Creates a new OFString from a format C string. * See printf for the format syntax. * * \param fmt A string used as format to initialize the OFMutableString * \return A new autoreleased OFMutableString */ + stringWithFormat: (OFString*)fmt, ...; /** - * Creates a new OFMutableString from a format C string. + * Creates a new OFString from a format C string. * See printf for the format syntax. * * \param fmt A string used as format to initialize the OFMutableString * \param args The arguments used in the format string * \return A new autoreleased OFMutableString @@ -69,10 +72,38 @@ * * \return An initialized OFString */ - init; +/** + * Initializes an already allocated OFMutableString from a C string. + * + * \param str A C string to initialize the OFMutableString with + * \return An initialized OFMutableString + */ +- initWithCString: (const char*)str; + +/** + * Initializes an already allocated OFMutableString from a format C string. + * See printf for the format syntax. + * + * \param fmt A string used as format to initialize the OFMutableString + * \return An initialized OFMutableString + */ +- initWithFormat: (OFString*)fmt, ...; + +/** + * Initializes an already allocated OFMutableString from a format C string. + * See printf for the format syntax. + * + * \param fmt A string used as format to initialize the OFMutableString + * \param args The arguments used in the format string + * \return An initialized OFMutableString + */ +- initWithFormat: (OFString*)fmt + andArguments: (va_list)args; + /** * \return The OFString as a C string */ - (const char*)cString; @@ -93,11 +124,11 @@ /** * Sets the OFString to the specified OFString. * * \param str An OFString to set the OFString to. */ -- setTo: (const char*)str; +- setToCString: (const char*)str; /** * Appends another OFString to the OFString. * * \param str An OFString to append Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -9,62 +9,233 @@ * the packaging of this file. */ #import "config.h" +#define _GNU_SOURCE #include #include #include + +#ifdef HAVE_SYS_MMAN_H +#include +#else +#define madvise(addr, len, advise) +#endif #import "OFString.h" #import "OFAutoreleasePool.h" #import "OFURLEncoding.h" #import "OFExceptions.h" #import "OFMacros.h" + +#ifndef HAVE_ASPRINTF +#import "asprintf.h" +#endif /* Reference for static linking */ void _reference_to_OFURLEncoding_in_OFString() { _OFURLEncoding_reference = 1; }; + +int +of_string_check_utf8(const char *str, size_t len) +{ + size_t i; + int utf8 = 0; + + madvise((void*)str, len, MADV_SEQUENTIAL); + + for (i = 0; i < len; i++) { + /* No sign of UTF-8 here */ + if (OF_LIKELY(!(str[i] & 0x80))) + continue; + + utf8 = 1; + + /* We're missing a start byte here */ + if (OF_UNLIKELY(!(str[i] & 0x40))) { + madvise((void*)str, len, MADV_NORMAL); + return -1; + } + + /* We have at minimum a 2 byte character -> check next byte */ + if (OF_UNLIKELY(len < i + 1 || (str[i + 1] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); + return -1; + } + + /* Check if we have at minimum a 3 byte character */ + if (OF_LIKELY(!(str[i] & 0x20))) { + i++; + continue; + } + + /* We have at minimum a 3 byte char -> check second next byte */ + if (OF_UNLIKELY(len < i + 2 || (str[i + 2] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); + return -1; + } + + /* Check if we have a 4 byte character */ + if (OF_LIKELY(!(str[i] & 0x10))) { + i += 2; + continue; + } + + /* We have a 4 byte character -> check third next byte */ + if (OF_UNLIKELY(len < i + 3 || (str[i + 3] & 0xC0) != 0x80)) { + madvise((void*)str, len, MADV_NORMAL); + return -1; + } + + /* + * Just in case, check if there's a 5th character, which is + * forbidden by UTF-8 + */ + if (OF_UNLIKELY(str[i] & 0x08)) { + madvise((void*)str, len, MADV_NORMAL); + return -1; + } + + i += 3; + } + + madvise((void*)str, len, MADV_NORMAL); + + return utf8; +} @implementation OFString + string { - return [[[OFMutableString alloc] init] autorelease]; + return [[[self alloc] init] autorelease]; } + stringWithCString: (const char*)str { - return [[[OFMutableString alloc] initWithCString: str] autorelease]; + return [[[self alloc] initWithCString: str] autorelease]; } + stringWithFormat: (OFString*)fmt, ... { id ret; va_list args; va_start(args, fmt); - ret = [[[OFMutableString alloc] initWithFormat: fmt - andArguments: args] autorelease]; + ret = [[[self alloc] initWithFormat: fmt + andArguments: args] autorelease]; va_end(args); return ret; } + stringWithFormat: (OFString*)fmt andArguments: (va_list)args { - return [[[OFMutableString alloc] initWithFormat: fmt - andArguments: args] autorelease]; + return [[[self alloc] initWithFormat: fmt + andArguments: args] autorelease]; } - init { [super init]; string = NULL; + + return self; +} + +- initWithCString: (const char*)str +{ + Class c; + + self = [super init]; + + if (str != NULL) { + length = strlen(str); + + switch (of_string_check_utf8(str, length)) { + case 1: + is_utf8 = YES; + break; + case -1: + c = isa; + [super dealloc]; + @throw [OFInvalidEncodingException + newWithClass: c]; + } + + @try { + string = [self allocWithSize: length + 1]; + } @catch (OFException *e) { + /* + * We can't use [super dealloc] on OS X here. + * Compiler bug? Anyway, [self dealloc] will do here as + * we don't reimplement dealloc. + */ + [self dealloc]; + @throw e; + } + memcpy(string, str, length + 1); + } + + return self; +} + +- initWithFormat: (OFString*)fmt, ... +{ + id ret; + va_list args; + + va_start(args, fmt); + ret = [self initWithFormat: fmt + andArguments: args]; + va_end(args); + + return ret; +} + +- initWithFormat: (OFString*)fmt + andArguments: (va_list)args +{ + int t; + Class c; + + self = [super init]; + + if (fmt == NULL) { + c = isa; + [super dealloc]; + @throw [OFInvalidFormatException newWithClass: c]; + } + + if ((t = vasprintf(&string, [fmt cString], args)) == -1) { + c = isa; + [super dealloc]; + @throw [OFInitializationFailedException newWithClass: c]; + } + length = t; + + switch (of_string_check_utf8(string, length)) { + case 1: + is_utf8 = YES; + break; + case -1: + free(string); + c = isa; + [super dealloc]; + @throw [OFInvalidEncodingException newWithClass: c]; + } + + @try { + [self addToMemoryPool: string]; + } @catch (OFException *e) { + free(string); + @throw e; + } return self; } - (const char*)cString @@ -112,11 +283,11 @@ OF_HASH_FINALIZE(hash); return hash; } -- setTo: (const char*)str +- setToCString: (const char*)str { @throw [OFNotImplementedException newWithClass: isa andSelector: _cmd]; } Index: tests/OFString/OFString.m ================================================================== --- tests/OFString/OFString.m +++ tests/OFString/OFString.m @@ -55,14 +55,14 @@ { size_t i = 0; size_t j = 0; OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; - OFString *s1 = [OFString stringWithCString: "test"]; - OFString *s2 = [OFString stringWithCString: ""]; + OFString *s1 = [OFMutableString stringWithCString: "test"]; + OFString *s2 = [OFMutableString stringWithCString: ""]; OFString *s3; - OFString *s4 = [OFString string]; + OFString *s4 = [OFMutableString string]; OFArray *a; s3 = [s1 copy]; CHECK([s1 isEqual: s3]) @@ -69,11 +69,11 @@ CHECK(![s1 isEqual: [[OFObject alloc] init]]) CHECK([s1 hash] == [s3 hash]) [s2 appendCString: "12"]; [s2 append: @"3"]; - [s4 setTo: [s2 cString]]; + [s4 setToCString: [s2 cString]]; CHECK(![s2 compare: s4]) CHECK(!strcmp([[s1 append: s2] cString], "test123")) CHECK([s1 hash] == 0xC44F49A4) CHECK(strlen([s1 cString]) == [s1 length] && [s1 length] == 7) @@ -88,16 +88,16 @@ CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xE0\x80"], OFInvalidEncodingException) CHECK_EXCEPT(s1 = [OFString stringWithCString: "\xF0\x80\x80\xC0"], OFInvalidEncodingException) - s1 = [OFString stringWithCString: "äöü€𝄞"]; + s1 = [OFMutableString stringWithCString: "äöü€𝄞"]; CHECK(!strcmp([[s1 reverse] cString], "𝄞€üöä")) [s1 dealloc]; /* Format tests */ - s1 = [OFString stringWithFormat: @"%s: %d", "test", 123]; + s1 = [OFMutableString stringWithFormat: @"%s: %d", "test", 123]; CHECK(!strcmp([s1 cString], "test: 123")) [s1 appendWithFormat: @"%02X", 15]; CHECK(!strcmp([s1 cString], "test: 1230F"))