Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -27,11 +27,12 @@ */ typedef enum of_string_encoding_t { OF_STRING_ENCODING_UTF_8, OF_STRING_ENCODING_ISO_8859_1, OF_STRING_ENCODING_ISO_8859_15, - OF_STRING_ENCODING_WINDOWS_1252 + OF_STRING_ENCODING_WINDOWS_1252, + OF_STRING_ENCODING_AUTODETECT = 0xFF } of_string_encoding_t; #ifdef __cplusplus extern "C" { #endif @@ -147,12 +148,17 @@ */ + stringWithContentsOfFile: (OFString*)path encoding: (of_string_encoding_t)encoding; /** - * Creates a new OFString with the contents of the specified URL, assuming - * UTF-8 encoding. + * Creates a new OFString with the contents of the specified URL. + * + * If the URL's scheme is file, it tries UTF-8 encoding. + * + * If the URL's scheme is http(s), it tries to detect the encoding from the HTTP + * headers. If it could not detect the encoding using the HTTP headers, it tries + * UTF-8 and falls back to ISO-8859-1 if that fails. * * \param URL The URL to the contents for the string * \return A new autoreleased OFString */ + stringWithContentsOfURL: (OFURL*)URL; @@ -279,11 +285,17 @@ - initWithContentsOfFile: (OFString*)path encoding: (of_string_encoding_t)encoding; /** * Initializes an already allocated OFString with the contents of the specified - * URL, assuming UTF-8 encoding. + * URL. + * + * If the URL's scheme is file, it tries UTF-8 encoding. + * + * If the URL's scheme is http(s), it tries to detect the encoding from the HTTP + * headers. If it could not detect the encoding using the HTTP headers, it tries + * UTF-8 and falls back to ISO-8859-1 if that fails. * * \param URL The URL to the contents for the string * \return An initialized OFString */ - initWithContentsOfURL: (OFURL*)URL; Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -30,10 +30,11 @@ # define madvise(addr, len, advise) #endif #import "OFString.h" #import "OFArray.h" +#import "OFDictionary.h" #import "OFFile.h" #import "OFURL.h" #import "OFHTTPRequest.h" #import "OFAutoreleasePool.h" @@ -58,11 +59,11 @@ { _OFString_Hashing_reference = 1; _OFString_URLEncoding_reference = 1; _OFString_XMLEscaping_reference = 1; _OFString_XMLUnescaping_reference = 1; -}; +} static inline int memcasecmp(const char *first, const char *second, size_t len) { size_t i; @@ -697,27 +698,31 @@ } - initWithContentsOfURL: (OFURL*)URL { return [self initWithContentsOfURL: URL - encoding: OF_STRING_ENCODING_UTF_8]; + encoding: OF_STRING_ENCODING_AUTODETECT]; } - initWithContentsOfURL: (OFURL*)URL encoding: (of_string_encoding_t)encoding { OFAutoreleasePool *pool; OFHTTPRequest *request; OFHTTPRequestResult *result; + OFMutableString *contentType; Class c; c = isa; [self release]; pool = [[OFAutoreleasePool alloc] init]; if ([[URL scheme] isEqual: @"file"]) { + if (encoding == OF_STRING_ENCODING_AUTODETECT) + encoding = OF_STRING_ENCODING_UTF_8; + self = [[c alloc] initWithContentsOfFile: [URL path] encoding: encoding]; [pool release]; return self; } @@ -729,13 +734,43 @@ @throw [OFHTTPRequestFailedException newWithClass: [request class] HTTPRequest: request statusCode: [result statusCode]]; - self = [[c alloc] initWithCString: (char*)[[result data] cArray] - encoding: encoding - length: [[result data] count]]; + if (encoding == OF_STRING_ENCODING_AUTODETECT && + (contentType = [[result headers] objectForKey: @"Content-Type"])) { + contentType = [[contentType mutableCopy] autorelease]; + [contentType lower]; + + if ([contentType hasSuffix: @"encoding=UTF-8"]) + encoding = OF_STRING_ENCODING_UTF_8; + if ([contentType hasSuffix: @"encoding=iso-8859-1"]) + encoding = OF_STRING_ENCODING_ISO_8859_1; + if ([contentType hasSuffix: @"encoding=iso-8859-15"]) + encoding = OF_STRING_ENCODING_ISO_8859_15; + if ([contentType hasSuffix: @"encoding=windows-1252"]) + encoding = OF_STRING_ENCODING_WINDOWS_1252; + } + + if (encoding == OF_STRING_ENCODING_AUTODETECT) { + @try { + self = [[c alloc] + initWithCString: (char*)[[result data] cArray] + encoding: OF_STRING_ENCODING_UTF_8 + length: [[result data] count]]; + } @catch (OFInvalidEncodingException *e) { + self = [[c alloc] + initWithCString: (char*)[[result data] cArray] + encoding: OF_STRING_ENCODING_ISO_8859_1 + length: [[result data] count]]; + } + } else { + self = [[c alloc] initWithCString: (char*)[[result data] cArray] + encoding: encoding + length: [[result data] count]]; + } + [pool release]; return self; } - (const char*)cString