ObjFW  Artifact [88e2ee5d8c]

Artifact 88e2ee5d8cb865fd7c89dc194b000b3ace5cb276e519328781fab500e38c32dc:


     1  /*
     2   * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
     3   *               2018, 2019, 2020
     4   *   Jonathan Schleifer <js@nil.im>
     5   *
     6   * All rights reserved.
     7   *
     8   * This file is part of ObjFW. It may be distributed under the terms of the
     9   * Q Public License 1.0, which can be found in the file LICENSE.QPL included in
    10   * the packaging of this file.
    11   *
    12   * Alternatively, it may be distributed under the terms of the GNU General
    13   * Public License, either version 2 or 3, which can be found in the file
    14   * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
    15   * file.
    16   */
    17  
18 /* 19 * This file tries to make writing UTF-8 strings to the console "just work" on 20 * Windows. 21 * 22 * While Windows does provide a way to change the codepage of the console to 23 * UTF-8, unfortunately, different Windows versions handle that differently. 24 * For example, on Windows XP, when using Windows XP's console, changing the 25 * codepage to UTF-8 mostly breaks write() and completely breaks read(): 26 * write() suddenly returns the number of characters - instead of bytes - 27 * written and read() just returns 0 as soon as a Unicode character is being 28 * read. 29 * 30 * Therefore, instead of just using the UTF-8 codepage, this captures all reads 31 * and writes to of_std{in,out,err} on the low level, interprets the buffer as 32 * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW(). 33 * Doing so is safe, as the console only supports text anyway and thus it does 34 * not matter if binary gets garbled by the conversion (e.g. because invalid 35 * UTF-8 gets converted to U+FFFD). 36 * 37 * In order to not do this when redirecting input / output to a file (as the 38 * file would then be read / written in the wrong encoding and break reading / 39 * writing binary), it checks that the handle is indeed a console. 40 */
41 42 #define OF_STDIO_STREAM_WIN32_CONSOLE_M 43 44 #include "config.h" 45 46 #include <assert.h> 47 #include <errno.h> 48 #include <io.h> 49 50 #import "OFWin32ConsoleStdIOStream.h" 51 #import "OFData.h" 52 #import "OFStdIOStream+Private.h" 53 #import "OFString.h" 54 #import "OFSystemInfo.h" 55 56 #import "OFInvalidArgumentException.h" 57 #import "OFInvalidEncodingException.h" 58 #import "OFOutOfRangeException.h" 59 #import "OFReadFailedException.h" 60 #import "OFWriteFailedException.h" 61 62 #include <windows.h> 63 64 static of_string_encoding_t 65 codepageToEncoding(UINT codepage) 66 { 67 switch (codepage) { 68 case 437: 69 return OF_STRING_ENCODING_CODEPAGE_437; 70 case 850: 71 return OF_STRING_ENCODING_CODEPAGE_850; 72 case 858: 73 return OF_STRING_ENCODING_CODEPAGE_858; 74 case 1251: 75 return OF_STRING_ENCODING_WINDOWS_1251; 76 case 1252: 77 return OF_STRING_ENCODING_WINDOWS_1252; 78 default: 79 @throw [OFInvalidEncodingException exception]; 80 } 81 } 82 83 @implementation OFWin32ConsoleStdIOStream 84 + (void)load 85 { 86 int fd; 87 88 if (self != [OFWin32ConsoleStdIOStream class]) 89 return; 90 91 if ((fd = _fileno(stdin)) >= 0) 92 of_stdin = [[OFWin32ConsoleStdIOStream alloc] 93 of_initWithFileDescriptor: fd]; 94 if ((fd = _fileno(stdout)) >= 0) 95 of_stdout = [[OFWin32ConsoleStdIOStream alloc] 96 of_initWithFileDescriptor: fd]; 97 if ((fd = _fileno(stderr)) >= 0) 98 of_stderr = [[OFWin32ConsoleStdIOStream alloc] 99 of_initWithFileDescriptor: fd]; 100 } 101 102 - (instancetype)of_initWithFileDescriptor: (int)fd 103 { 104 self = [super of_initWithFileDescriptor: fd]; 105 106 @try { 107 DWORD mode; 108 109 _handle = (HANDLE)_get_osfhandle(fd); 110 if (_handle == INVALID_HANDLE_VALUE) 111 @throw [OFInvalidArgumentException exception]; 112 113 /* Not a console: Treat it as a regular OFStdIOStream */ 114 if (!GetConsoleMode(_handle, &mode)) 115 object_setClass(self, [OFStdIOStream class]); 116 } @catch (id e) { 117 [self release]; 118 @throw e; 119 } 120 121 return self; 122 } 123 124 - (size_t)lowlevelReadIntoBuffer: (void *)buffer_ 125 length: (size_t)length 126 { 127 void *pool = objc_autoreleasePoolPush(); 128 char *buffer = buffer_; 129 of_char16_t *UTF16; 130 size_t j = 0; 131 132 if (length > UINT32_MAX) 133 @throw [OFOutOfRangeException exception]; 134 135 UTF16 = [self allocMemoryWithSize: sizeof(of_char16_t) 136 count: length]; 137 @try { 138 DWORD UTF16Len; 139 OFMutableData *rest = nil; 140 size_t i = 0; 141 142 if ([OFSystemInfo isWindowsNT]) { 143 if (!ReadConsoleW(_handle, UTF16, (DWORD)length, 144 &UTF16Len, NULL)) 145 @throw [OFReadFailedException 146 exceptionWithObject: self 147 requestedLength: length * 2 148 errNo: EIO]; 149 } else { 150 of_string_encoding_t encoding; 151 OFString *string; 152 size_t stringLen; 153 154 if (!ReadConsoleA(_handle, (char *)UTF16, (DWORD)length, 155 &UTF16Len, NULL)) 156 @throw [OFReadFailedException 157 exceptionWithObject: self 158 requestedLength: length 159 errNo: EIO]; 160 161 encoding = codepageToEncoding(GetConsoleCP()); 162 string = [OFString stringWithCString: (char *)UTF16 163 encoding: encoding 164 length: UTF16Len]; 165 stringLen = string.UTF16StringLength; 166 167 if (stringLen > length) 168 @throw [OFOutOfRangeException exception]; 169 170 UTF16Len = (DWORD)stringLen; 171 memcpy(UTF16, string.UTF16String, stringLen); 172 } 173 174 if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) { 175 of_unichar_t c = 176 (((_incompleteUTF16Surrogate & 0x3FF) << 10) | 177 (UTF16[0] & 0x3FF)) + 0x10000; 178 char UTF8[4]; 179 size_t UTF8Len; 180 181 if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0) 182 @throw [OFInvalidEncodingException exception]; 183 184 if (UTF8Len <= length) { 185 memcpy(buffer, UTF8, UTF8Len); 186 j += UTF8Len; 187 } else { 188 if (rest == nil) 189 rest = [OFMutableData data]; 190 191 [rest addItems: UTF8 192 count: UTF8Len]; 193 } 194 195 _incompleteUTF16Surrogate = 0; 196 i++; 197 } 198 199 for (; i < UTF16Len; i++) { 200 of_unichar_t c = UTF16[i]; 201 char UTF8[4]; 202 size_t UTF8Len; 203 204 /* Missing high surrogate */ 205 if ((c & 0xFC00) == 0xDC00) 206 @throw [OFInvalidEncodingException exception]; 207 208 if ((c & 0xFC00) == 0xD800) { 209 of_char16_t next; 210 211 if (UTF16Len <= i + 1) { 212 _incompleteUTF16Surrogate = c; 213 214 if (rest != nil) { 215 const char *items = rest.items; 216 size_t count = rest.count; 217 218 [self unreadFromBuffer: items 219 length: count]; 220 } 221 222 objc_autoreleasePoolPop(pool); 223 224 return j; 225 } 226 227 next = UTF16[i + 1]; 228 229 if ((next & 0xFC00) != 0xDC00) 230 @throw [OFInvalidEncodingException 231 exception]; 232 233 c = (((c & 0x3FF) << 10) | (next & 0x3FF)) + 234 0x10000; 235 236 i++; 237 } 238 239 if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0) 240 @throw [OFInvalidEncodingException exception]; 241 242 if (j + UTF8Len <= length) { 243 memcpy(buffer + j, UTF8, UTF8Len); 244 j += UTF8Len; 245 } else { 246 if (rest == nil) 247 rest = [OFMutableData data]; 248 249 [rest addItems: UTF8 250 count: UTF8Len]; 251 } 252 } 253 254 if (rest != nil) 255 [self unreadFromBuffer: rest.items 256 length: rest.count]; 257 } @finally { 258 [self freeMemory: UTF16]; 259 } 260 261 objc_autoreleasePoolPop(pool); 262 263 return j; 264 } 265 266 - (size_t)lowlevelWriteBuffer: (const void *)buffer_ 267 length: (size_t)length 268 { 269 const char *buffer = buffer_; 270 of_char16_t *tmp; 271 size_t i = 0, j = 0; 272 273 if (length > SIZE_MAX / 2) 274 @throw [OFOutOfRangeException exception]; 275 276 if (_incompleteUTF8SurrogateLen > 0) { 277 of_unichar_t c; 278 of_char16_t UTF16[2]; 279 ssize_t UTF8Len; 280 size_t toCopy; 281 DWORD UTF16Len, bytesWritten; 282 283 UTF8Len = -of_string_utf8_decode( 284 _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c); 285 286 OF_ENSURE(UTF8Len > 0); 287 288 toCopy = UTF8Len - _incompleteUTF8SurrogateLen; 289 if (toCopy > length) 290 toCopy = length; 291 292 memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen, 293 buffer, toCopy); 294 _incompleteUTF8SurrogateLen += toCopy; 295 296 if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len) 297 return 0; 298 299 UTF8Len = of_string_utf8_decode( 300 _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c); 301 302 if (UTF8Len <= 0 || c > 0x10FFFF) { 303 assert(UTF8Len == 0 || UTF8Len < -4); 304 305 UTF16[0] = 0xFFFD; 306 UTF16Len = 1; 307 } else { 308 if (c > 0xFFFF) { 309 c -= 0x10000; 310 UTF16[0] = 0xD800 | (c >> 10); 311 UTF16[1] = 0xDC00 | (c & 0x3FF); 312 UTF16Len = 2; 313 } else { 314 UTF16[0] = c; 315 UTF16Len = 1; 316 } 317 } 318 319 if ([OFSystemInfo isWindowsNT]) { 320 if (!WriteConsoleW(_handle, UTF16, UTF16Len, 321 &bytesWritten, NULL)) 322 @throw [OFWriteFailedException 323 exceptionWithObject: self 324 requestedLength: UTF16Len * 2 325 bytesWritten: bytesWritten * 2 326 errNo: EIO]; 327 } else { 328 void *pool = objc_autoreleasePoolPush(); 329 OFString *string = [OFString 330 stringWithUTF16String: UTF16 331 length: UTF16Len]; 332 of_string_encoding_t encoding = 333 codepageToEncoding(GetConsoleOutputCP()); 334 size_t nativeLen = [string 335 cStringLengthWithEncoding: encoding]; 336 337 if (nativeLen > UINT32_MAX) 338 @throw [OFOutOfRangeException exception]; 339 340 if (!WriteConsoleA(_handle, 341 [string cStringWithEncoding: encoding], 342 (DWORD)nativeLen, &bytesWritten, NULL)) 343 @throw [OFWriteFailedException 344 exceptionWithObject: self 345 requestedLength: nativeLen 346 bytesWritten: bytesWritten 347 errNo: EIO]; 348 349 objc_autoreleasePoolPop(pool); 350 } 351 352 if (bytesWritten != UTF16Len) 353 @throw [OFWriteFailedException 354 exceptionWithObject: self 355 requestedLength: UTF16Len * 2 356 bytesWritten: bytesWritten * 2 357 errNo: 0]; 358 359 _incompleteUTF8SurrogateLen = 0; 360 i += toCopy; 361 } 362 363 tmp = [self allocMemoryWithSize: sizeof(of_char16_t) 364 count: length * 2]; 365 @try { 366 DWORD bytesWritten; 367 368 while (i < length) { 369 of_unichar_t c; 370 ssize_t UTF8Len; 371 372 UTF8Len = of_string_utf8_decode(buffer + i, length - i, 373 &c); 374 375 if (UTF8Len < 0 && UTF8Len >= -4) { 376 OF_ENSURE(length - i < 4); 377 378 memcpy(_incompleteUTF8Surrogate, buffer + i, 379 length - i); 380 _incompleteUTF8SurrogateLen = length - i; 381 382 break; 383 } 384 385 if (UTF8Len <= 0 || c > 0x10FFFF) { 386 tmp[j++] = 0xFFFD; 387 i++; 388 continue; 389 } 390 391 if (c > 0xFFFF) { 392 c -= 0x10000; 393 tmp[j++] = 0xD800 | (c >> 10); 394 tmp[j++] = 0xDC00 | (c & 0x3FF); 395 } else 396 tmp[j++] = c; 397 398 i += UTF8Len; 399 } 400 401 if (j > UINT32_MAX) 402 @throw [OFOutOfRangeException exception]; 403 404 if ([OFSystemInfo isWindowsNT]) { 405 if (!WriteConsoleW(_handle, tmp, (DWORD)j, 406 &bytesWritten, NULL)) 407 @throw [OFWriteFailedException 408 exceptionWithObject: self 409 requestedLength: j * 2 410 bytesWritten: bytesWritten * 2 411 errNo: EIO]; 412 } else { 413 void *pool = objc_autoreleasePoolPush(); 414 OFString *string = [OFString stringWithUTF16String: tmp 415 length: j]; 416 of_string_encoding_t encoding = 417 codepageToEncoding(GetConsoleOutputCP()); 418 size_t nativeLen = [string 419 cStringLengthWithEncoding: encoding]; 420 421 if (nativeLen > UINT32_MAX) 422 @throw [OFOutOfRangeException exception]; 423 424 if (!WriteConsoleA(_handle, 425 [string cStringWithEncoding: encoding], 426 (DWORD)nativeLen, &bytesWritten, NULL)) 427 @throw [OFWriteFailedException 428 exceptionWithObject: self 429 requestedLength: nativeLen 430 bytesWritten: bytesWritten 431 errNo: EIO]; 432 433 objc_autoreleasePoolPop(pool); 434 } 435 436 if (bytesWritten != j) 437 @throw [OFWriteFailedException 438 exceptionWithObject: self 439 requestedLength: j * 2 440 bytesWritten: bytesWritten * 2 441 errNo: 0]; 442 } @finally { 443 [self freeMemory: tmp]; 444 } 445 446 /* 447 * We do not count in bytes when writing to the Win32 console. But 448 * since any incomplete write is an exception here anyway, we can just 449 * return length. 450 */ 451 return length; 452 } 453 @end