/*
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
* 2018, 2019
* Jonathan Schleifer <js@heap.zone>
*
* All rights reserved.
*
* This file is part of ObjFW. It may be distributed under the terms of the
* Q Public License 1.0, which can be found in the file LICENSE.QPL included in
* the packaging of this file.
*
* Alternatively, it may be distributed under the terms of the GNU General
* Public License, either version 2 or 3, which can be found in the file
* LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
* file.
*/
/*
* This file tries to make writing UTF-8 strings to the console "just work" on
* Windows.
*
* While Windows does provide a way to change the codepage of the console to
* UTF-8, unfortunately, different Windows versions handle that differently.
* For example, on Windows XP, when using Windows XP's console, changing the
* codepage to UTF-8 mostly breaks write() and completely breaks read():
* write() suddenly returns the number of characters - instead of bytes -
* written and read() just returns 0 as soon as a Unicode character is being
* read.
*
* Therefore, instead of just using the UTF-8 codepage, this captures all reads
* and writes to of_std{in,out,err} on the low level, interprets the buffer as
* UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
* Doing so is safe, as the console only supports text anyway and thus it does
* not matter if binary gets garbled by the conversion (e.g. because invalid
* UTF-8 gets converted to U+FFFD).
*
* In order to not do this when redirecting input / output to a file (as the
* file would then be read / written in the wrong encoding and break reading /
* writing binary), it checks that the handle is indeed a console.
*/
#define OF_STDIO_STREAM_WIN32_CONSOLE_M
#include "config.h"
#include <assert.h>
#include <errno.h>
#import "OFStdIOStream_Win32Console.h"
#import "OFStdIOStream+Private.h"
#import "OFString.h"
#import "OFData.h"
#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFOutOfRangeException.h"
#import "OFReadFailedException.h"
#import "OFWriteFailedException.h"
#include <windows.h>
@implementation OFStdIOStream_Win32Console
+ (void)load
{
of_stdin = [[OFStdIOStream_Win32Console alloc]
of_initWithFileDescriptor: 0];
of_stdout = [[OFStdIOStream_Win32Console alloc]
of_initWithFileDescriptor: 1];
of_stderr = [[OFStdIOStream_Win32Console alloc]
of_initWithFileDescriptor: 2];
}
- (instancetype)of_initWithFileDescriptor: (int)fd
{
self = [super of_initWithFileDescriptor: fd];
@try {
DWORD mode;
switch (fd) {
case 0:
_handle = GetStdHandle(STD_INPUT_HANDLE);
break;
case 1:
_handle = GetStdHandle(STD_OUTPUT_HANDLE);
break;
case 2:
_handle = GetStdHandle(STD_ERROR_HANDLE);
break;
default:
@throw [OFInvalidArgumentException exception];
}
/* Not a console: Treat it as a regular OFStdIOStream */
if (!GetConsoleMode(_handle, &mode))
object_setClass(self, [OFStdIOStream class]);
} @catch (id e) {
[self release];
@throw e;
}
return self;
}
- (size_t)lowlevelReadIntoBuffer: (void *)buffer_
length: (size_t)length
{
void *pool = objc_autoreleasePoolPush();
char *buffer = buffer_;
of_char16_t *UTF16;
size_t j = 0;
if (length > sizeof(UINT32_MAX))
@throw [OFOutOfRangeException exception];
UTF16 = [self allocMemoryWithSize: sizeof(of_char16_t)
count: length];
@try {
DWORD UTF16Len;
OFMutableData *rest = nil;
size_t i = 0;
if (!ReadConsoleW(_handle, UTF16, (DWORD)length, &UTF16Len,
NULL))
@throw [OFReadFailedException
exceptionWithObject: self
requestedLength: length * 2
errNo: EIO];
if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) {
of_unichar_t c =
(((_incompleteUTF16Surrogate & 0x3FF) << 10) |
(UTF16[0] & 0x3FF)) + 0x10000;
char UTF8[4];
size_t UTF8Len;
if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
@throw [OFInvalidEncodingException exception];
if (UTF8Len <= length) {
memcpy(buffer, UTF8, UTF8Len);
j += UTF8Len;
} else {
if (rest == nil)
rest = [OFMutableData data];
[rest addItems: UTF8
count: UTF8Len];
}
_incompleteUTF16Surrogate = 0;
i++;
}
for (; i < UTF16Len; i++) {
of_unichar_t c = UTF16[i];
char UTF8[4];
size_t UTF8Len;
/* Missing high surrogate */
if ((c & 0xFC00) == 0xDC00)
@throw [OFInvalidEncodingException exception];
if ((c & 0xFC00) == 0xD800) {
of_char16_t next;
if (UTF16Len <= i + 1) {
_incompleteUTF16Surrogate = c;
if (rest != nil) {
char *items = [rest items];
size_t count = [rest count];
[self unreadFromBuffer: items
length: count];
}
objc_autoreleasePoolPop(pool);
return j;
}
next = UTF16[i + 1];
if ((next & 0xFC00) != 0xDC00)
@throw [OFInvalidEncodingException
exception];
c = (((c & 0x3FF) << 10) | (next & 0x3FF)) +
0x10000;
i++;
}
if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
@throw [OFInvalidEncodingException exception];
if (j + UTF8Len <= length) {
memcpy(buffer + j, UTF8, UTF8Len);
j += UTF8Len;
} else {
if (rest == nil)
rest = [OFMutableData data];
[rest addItems: UTF8
count: UTF8Len];
}
}
if (rest != nil)
[self unreadFromBuffer: [rest items]
length: [rest count]];
} @finally {
[self freeMemory: UTF16];
}
objc_autoreleasePoolPop(pool);
return j;
}
- (size_t)lowlevelWriteBuffer: (const void *)buffer_
length: (size_t)length
{
const char *buffer = buffer_;
of_char16_t *tmp;
size_t i = 0, j = 0;
if (length > SIZE_MAX / 2)
@throw [OFOutOfRangeException exception];
if (_incompleteUTF8SurrogateLen > 0) {
of_unichar_t c;
of_char16_t UTF16[2];
ssize_t UTF8Len;
size_t toCopy;
DWORD UTF16Len, bytesWritten;
UTF8Len = -of_string_utf8_decode(
_incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);
OF_ENSURE(UTF8Len > 0);
toCopy = UTF8Len - _incompleteUTF8SurrogateLen;
if (toCopy > length)
toCopy = length;
memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen,
buffer, toCopy);
_incompleteUTF8SurrogateLen += toCopy;
if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len)
return 0;
UTF8Len = of_string_utf8_decode(
_incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);
if (UTF8Len <= 0 || c > 0x10FFFF) {
assert(UTF8Len == 0 || UTF8Len < -4);
UTF16[0] = 0xFFFD;
UTF16Len = 1;
} else {
if (c > 0xFFFF) {
c -= 0x10000;
UTF16[0] = 0xD800 | (c >> 10);
UTF16[1] = 0xDC00 | (c & 0x3FF);
UTF16Len = 2;
} else {
UTF16[0] = c;
UTF16Len = 1;
}
}
if (!WriteConsoleW(_handle, UTF16, UTF16Len, &bytesWritten,
NULL))
@throw [OFWriteFailedException
exceptionWithObject: self
requestedLength: UTF16Len * 2
bytesWritten: 0
errNo: EIO];
if (bytesWritten != UTF16Len)
@throw [OFWriteFailedException
exceptionWithObject: self
requestedLength: UTF16Len * 2
bytesWritten: bytesWritten * 2
errNo: 0];
_incompleteUTF8SurrogateLen = 0;
i += toCopy;
}
tmp = [self allocMemoryWithSize: sizeof(of_char16_t)
count: length * 2];
@try {
DWORD bytesWritten;
while (i < length) {
of_unichar_t c;
ssize_t UTF8Len;
UTF8Len = of_string_utf8_decode(buffer + i, length - i,
&c);
if (UTF8Len < 0 && UTF8Len >= -4) {
OF_ENSURE(length - i < 4);
memcpy(_incompleteUTF8Surrogate, buffer + i,
length - i);
_incompleteUTF8SurrogateLen = length - i;
break;
}
if (UTF8Len <= 0 || c > 0x10FFFF) {
tmp[j++] = 0xFFFD;
i++;
continue;
}
if (c > 0xFFFF) {
c -= 0x10000;
tmp[j++] = 0xD800 | (c >> 10);
tmp[j++] = 0xDC00 | (c & 0x3FF);
} else
tmp[j++] = c;
i += UTF8Len;
}
if (j > UINT32_MAX)
@throw [OFOutOfRangeException exception];
if (!WriteConsoleW(_handle, tmp, (DWORD)j, &bytesWritten, NULL))
@throw [OFWriteFailedException
exceptionWithObject: self
requestedLength: j * 2
bytesWritten: 0
errNo: EIO];
if (bytesWritten != j)
@throw [OFWriteFailedException
exceptionWithObject: self
requestedLength: j * 2
bytesWritten: bytesWritten * 2
errNo: 0];
} @finally {
[self freeMemory: tmp];
}
/*
* We do not count in bytes when writing to the Win32 console. But
* since any incomplete write is an exception here anyway, we can just
* return length.
*/
return length;
}
@end