Overview
Comment: | OFStdIOStream_Win32Console: Improve reading
On reads, surrogates that have been cut off are now properly handled. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
566d4df60303cdfcb2f21ada37ffd196 |
User & Date: | js on 2016-03-13 10:24:22 |
Other Links: | manifest | tags |
Context
2016-03-13
| ||
11:19 | Change of_string_utf8_decode() API check-in: 5e2ef97c35 user: js tags: trunk | |
10:24 | OFStdIOStream_Win32Console: Improve reading check-in: 566d4df603 user: js tags: trunk | |
2016-03-12
| ||
21:19 | Work around Doxygen shortcomings check-in: 2425966b66 user: js tags: trunk | |
Changes
Modified src/OFStdIOStream_Win32Console.h from [5329f7273d] to [34f51c6008].
︙ | ︙ | |||
19 20 21 22 23 24 25 26 27 28 29 | #import "OFStdIOStream.h" OF_ASSUME_NONNULL_BEGIN @interface OFStdIOStream_Win32Console: OFStdIOStream { HANDLE _handle; } @end OF_ASSUME_NONNULL_END | > | 19 20 21 22 23 24 25 26 27 28 29 30 | #import "OFStdIOStream.h" OF_ASSUME_NONNULL_BEGIN @interface OFStdIOStream_Win32Console: OFStdIOStream { HANDLE _handle; of_char16_t _incompleteSurrogate; } @end OF_ASSUME_NONNULL_END |
Modified src/OFStdIOStream_Win32Console.m from [7d8639b924] to [88dea4ced5].
︙ | ︙ | |||
14 15 16 17 18 19 20 | * file. */ /* * This file tries to make writing UTF-8 strings to the console "just work" on * Windows. * | | | | | | | > | | | < | > | > | < < | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | * file. */ /* * This file tries to make writing UTF-8 strings to the console "just work" on * Windows. * * While Windows does provide a way to change the codepage of the console to * UTF-8, unfortunately, different Windows versions handle that differently. * For example, on Windows XP, when using Windows XP's console, changing the * codepage to UTF-8 mostly breaks write() and completely breaks read(): * write() suddenly returns the number of characters - instead of bytes - * written and read() just returns 0 as soon as a Unicode character is being * read. * * Therefore, instead of just using the UTF-8 codepage, this captures all reads * and writes to of_std{in,out,err} on the lowlevel, interprets the buffer as * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW(). * Doing so is safe, as the console only supports text anyway and thus it does * not matter if binary gets garbled by the conversion. * * In order to not do this when redirecting input / output to a file (as the * file would then be read / written in the wrong encoding and break reading / * writing binary), it checks that the handle is indeed a console. */ #define OF_STDIO_STREAM_WIN32_CONSOLE_M #include "config.h" #import "OFStdIOStream_Win32Console.h" |
︙ | ︙ | |||
99 100 101 102 103 104 105 | - (size_t)lowlevelReadIntoBuffer: (void*)buffer_ length: (size_t)length { void *pool = objc_autoreleasePoolPush(); char *buffer = buffer_; of_char16_t *UTF16; size_t j = 0; | < > > > > > > > > > > > > > > > > > > > > > > > > > | > | > > > | > > > > > > > > | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | - (size_t)lowlevelReadIntoBuffer: (void*)buffer_ length: (size_t)length { void *pool = objc_autoreleasePoolPush(); char *buffer = buffer_; of_char16_t *UTF16; size_t j = 0; UTF16 = [self allocMemoryWithSize: sizeof(of_char16_t) count: length]; @try { DWORD UTF16Len; OFDataArray *rest = nil; if (!ReadConsoleW(_handle, UTF16, length, &UTF16Len, NULL)) @throw [OFReadFailedException exceptionWithObject: self requestedLength: length]; if (UTF16Len > 0 && _incompleteSurrogate != 0) { of_unichar_t c = (((_incompleteSurrogate & 0x3FF) << 10) | (UTF16[0] & 0x3FF)) + 0x10000; char UTF8[4]; size_t UTF8Len; if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0) @throw [OFInvalidEncodingException exception]; if (UTF8Len <= length) { memcpy(buffer, UTF8, UTF8Len); j += UTF8Len; } else { if (rest == nil) rest = [OFDataArray dataArray]; [rest addItems: UTF8 count: UTF8Len]; } _incompleteSurrogate = 0; } for (size_t i = 0; i < UTF16Len; i++) { of_unichar_t c = UTF16[i]; char UTF8[4]; size_t UTF8Len; /* Missing high surrogate */ if ((c & 0xFC00) == 0xDC00) @throw [OFInvalidEncodingException exception]; if ((c & 0xFC00) == 0xD800) { of_char16_t next; if (UTF16Len <= i + 1) { _incompleteSurrogate = c; if (rest != nil) { char *items = [rest items]; size_t count = [rest count]; [self unreadFromBuffer: items length: count]; } objc_autoreleasePoolPop(pool); return j; } next = UTF16[i + 1]; if ((next & 0xFC00) != 0xDC00) @throw [OFInvalidEncodingException exception]; |
︙ | ︙ |