/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, * 2018 * Jonathan Schleifer <js@heap.zone> * * All rights reserved. * * This file is part of ObjFW. It may be distributed under the terms of the * Q Public License 1.0, which can be found in the file LICENSE.QPL included in * the packaging of this file. * * Alternatively, it may be distributed under the terms of the GNU General * Public License, either version 2 or 3, which can be found in the file * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ /* * This file tries to make writing UTF-8 strings to the console "just work" on * Windows. * * While Windows does provide a way to change the codepage of the console to * UTF-8, unfortunately, different Windows versions handle that differently. * For example, on Windows XP, when using Windows XP's console, changing the * codepage to UTF-8 mostly breaks write() and completely breaks read(): * write() suddenly returns the number of characters - instead of bytes - * written and read() just returns 0 as soon as a Unicode character is being * read. * * Therefore, instead of just using the UTF-8 codepage, this captures all reads * and writes to of_std{in,out,err} on the low level, interprets the buffer as * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW(). * Doing so is safe, as the console only supports text anyway and thus it does * not matter if binary gets garbled by the conversion (e.g. because invalid * UTF-8 gets converted to U+FFFD). * * In order to not do this when redirecting input / output to a file (as the * file would then be read / written in the wrong encoding and break reading / * writing binary), it checks that the handle is indeed a console. */ #define OF_STDIO_STREAM_WIN32_CONSOLE_M #include "config.h" #include <assert.h> #include <errno.h> #import "OFStdIOStream_Win32Console.h" #import "OFStdIOStream+Private.h" #import "OFString.h" #import "OFData.h" #import "OFInvalidArgumentException.h" #import "OFInvalidEncodingException.h" #import "OFOutOfRangeException.h" #import "OFReadFailedException.h" #import "OFWriteFailedException.h" #include <windows.h> @implementation OFStdIOStream_Win32Console + (void)load { of_stdin = [[OFStdIOStream_Win32Console alloc] of_initWithFileDescriptor: 0]; of_stdout = [[OFStdIOStream_Win32Console alloc] of_initWithFileDescriptor: 1]; of_stderr = [[OFStdIOStream_Win32Console alloc] of_initWithFileDescriptor: 2]; } - (instancetype)of_initWithFileDescriptor: (int)fd { self = [super of_initWithFileDescriptor: fd]; @try { DWORD mode; switch (fd) { case 0: _handle = GetStdHandle(STD_INPUT_HANDLE); break; case 1: _handle = GetStdHandle(STD_OUTPUT_HANDLE); break; case 2: _handle = GetStdHandle(STD_ERROR_HANDLE); break; default: @throw [OFInvalidArgumentException exception]; } /* Not a console: Treat it as a regular OFStdIOStream */ if (!GetConsoleMode(_handle, &mode)) object_setClass(self, [OFStdIOStream class]); } @catch (id e) { [self release]; @throw e; } return self; } - (size_t)lowlevelReadIntoBuffer: (void *)buffer_ length: (size_t)length { void *pool = objc_autoreleasePoolPush(); char *buffer = buffer_; of_char16_t *UTF16; size_t j = 0; if (length > sizeof(UINT32_MAX)) @throw [OFOutOfRangeException exception]; UTF16 = [self allocMemoryWithSize: sizeof(of_char16_t) count: length]; @try { DWORD UTF16Len; OFMutableData *rest = nil; size_t i = 0; if (!ReadConsoleW(_handle, UTF16, (DWORD)length, &UTF16Len, NULL)) @throw [OFReadFailedException exceptionWithObject: self requestedLength: length * 2 errNo: EIO]; if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) { of_unichar_t c = (((_incompleteUTF16Surrogate & 0x3FF) << 10) | (UTF16[0] & 0x3FF)) + 0x10000; char UTF8[4]; size_t UTF8Len; if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0) @throw [OFInvalidEncodingException exception]; if (UTF8Len <= length) { memcpy(buffer, UTF8, UTF8Len); j += UTF8Len; } else { if (rest == nil) rest = [OFMutableData data]; [rest addItems: UTF8 count: UTF8Len]; } _incompleteUTF16Surrogate = 0; i++; } for (; i < UTF16Len; i++) { of_unichar_t c = UTF16[i]; char UTF8[4]; size_t UTF8Len; /* Missing high surrogate */ if ((c & 0xFC00) == 0xDC00) @throw [OFInvalidEncodingException exception]; if ((c & 0xFC00) == 0xD800) { of_char16_t next; if (UTF16Len <= i + 1) { _incompleteUTF16Surrogate = c; if (rest != nil) { char *items = [rest items]; size_t count = [rest count]; [self unreadFromBuffer: items length: count]; } objc_autoreleasePoolPop(pool); return j; } next = UTF16[i + 1]; if ((next & 0xFC00) != 0xDC00) @throw [OFInvalidEncodingException exception]; c = (((c & 0x3FF) << 10) | (next & 0x3FF)) + 0x10000; i++; } if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0) @throw [OFInvalidEncodingException exception]; if (j + UTF8Len <= length) { memcpy(buffer + j, UTF8, UTF8Len); j += UTF8Len; } else { if (rest == nil) rest = [OFMutableData data]; [rest addItems: UTF8 count: UTF8Len]; } } if (rest != nil) [self unreadFromBuffer: [rest items] length: [rest count]]; } @finally { [self freeMemory: UTF16]; } objc_autoreleasePoolPop(pool); return j; } - (size_t)lowlevelWriteBuffer: (const void *)buffer_ length: (size_t)length { const char *buffer = buffer_; of_char16_t *tmp; size_t i = 0, j = 0; if (length > SIZE_MAX / 2) @throw [OFOutOfRangeException exception]; if (_incompleteUTF8SurrogateLen > 0) { of_unichar_t c; of_char16_t UTF16[2]; ssize_t UTF8Len; size_t toCopy; DWORD UTF16Len, bytesWritten; UTF8Len = -of_string_utf8_decode( _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c); OF_ENSURE(UTF8Len > 0); toCopy = UTF8Len - _incompleteUTF8SurrogateLen; if (toCopy > length) toCopy = length; memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen, buffer, toCopy); _incompleteUTF8SurrogateLen += toCopy; if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len) return 0; UTF8Len = of_string_utf8_decode( _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c); if (UTF8Len <= 0 || c > 0x10FFFF) { assert(UTF8Len == 0 || UTF8Len < -4); UTF16[0] = 0xFFFD; UTF16Len = 1; } else { if (c > 0xFFFF) { c -= 0x10000; UTF16[0] = 0xD800 | (c >> 10); UTF16[1] = 0xDC00 | (c & 0x3FF); UTF16Len = 2; } else { UTF16[0] = c; UTF16Len = 1; } } if (!WriteConsoleW(_handle, UTF16, UTF16Len, &bytesWritten, NULL)) @throw [OFWriteFailedException exceptionWithObject: self requestedLength: UTF16Len * 2 bytesWritten: 0 errNo: EIO]; if (bytesWritten != UTF16Len) @throw [OFWriteFailedException exceptionWithObject: self requestedLength: UTF16Len * 2 bytesWritten: bytesWritten * 2 errNo: 0]; _incompleteUTF8SurrogateLen = 0; i += toCopy; } tmp = [self allocMemoryWithSize: sizeof(of_char16_t) count: length * 2]; @try { DWORD bytesWritten; while (i < length) { of_unichar_t c; ssize_t UTF8Len; UTF8Len = of_string_utf8_decode(buffer + i, length - i, &c); if (UTF8Len < 0 && UTF8Len >= -4) { OF_ENSURE(length - i < 4); memcpy(_incompleteUTF8Surrogate, buffer + i, length - i); _incompleteUTF8SurrogateLen = length - i; break; } if (UTF8Len <= 0 || c > 0x10FFFF) { tmp[j++] = 0xFFFD; i++; continue; } if (c > 0xFFFF) { c -= 0x10000; tmp[j++] = 0xD800 | (c >> 10); tmp[j++] = 0xDC00 | (c & 0x3FF); } else tmp[j++] = c; i += UTF8Len; } if (j > UINT32_MAX) @throw [OFOutOfRangeException exception]; if (!WriteConsoleW(_handle, tmp, (DWORD)j, &bytesWritten, NULL)) @throw [OFWriteFailedException exceptionWithObject: self requestedLength: j * 2 bytesWritten: 0 errNo: EIO]; if (bytesWritten != j) @throw [OFWriteFailedException exceptionWithObject: self requestedLength: j * 2 bytesWritten: bytesWritten * 2 errNo: 0]; } @finally { [self freeMemory: tmp]; } /* * We do not count in bytes when writing to the Win32 console. But * since any incomplete write is an exception here anyway, we can just * return length. */ return length; } @end