14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
+
+
+
-
-
+
+
-
-
+
+
+
-
-
|
* file.
*/
/*
* This file tries to make writing UTF-8 strings to the console "just work" on
* Windows.
*
* Windows does provide a way to change the codepage of the console to UTF-8,
* but unfortunately, different Windows versions handle that differently. For
* example on Windows XP when using Windows XP's console, changing the codepage
* to UTF-8 mostly breaks write() and completely breaks read(): write()
* suddenly returns the number of characters - instead of bytes - written and
* read() just returns 0 as soon as a Unicode character is being read.
* While Windows does provide a way to change the codepage of the console to
* UTF-8, unfortunately, different Windows versions handle that differently.
* For example, on Windows XP, when using Windows XP's console, changing the
* codepage to UTF-8 mostly breaks write() and completely breaks read():
* write() suddenly returns the number of characters - instead of bytes -
* written and read() just returns 0 as soon as a Unicode character is being
* read.
*
* So instead of just using the UTF-8 codepage, this captures all reads and
* writes to of_std{in,err,out} on the lowlevel, interprets the buffer as UTF-8
* and converts to / from UTF-16 to use ReadConsoleW() and WriteConsoleW(), as
* Therefore, instead of just using the UTF-8 codepage, this captures all reads
* and writes to of_std{in,out,err} on the lowlevel, interprets the buffer as
* UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
* reading or writing binary from / to the console would not make any sense
* anyway and thus it's safe to assume it's text.
* Doing so is safe, as the console only supports text anyway and thus it does
* not matter if binary gets garbled by the conversion.
*
* In order to not do this when redirecting input / output to a file, it checks
* that the handle is indeed a console.
* In order to not do this when redirecting input / output to a file (as the
* file would then be read / written in the wrong encoding and break reading /
* writing binary), it checks that the handle is indeed a console.
*
* TODO: Properly handle surrogates being cut in the middle
*/
#define OF_STDIO_STREAM_WIN32_CONSOLE_M
#include "config.h"
#import "OFStdIOStream_Win32Console.h"
|
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
- (size_t)lowlevelReadIntoBuffer: (void*)buffer_
length: (size_t)length
{
void *pool = objc_autoreleasePoolPush();
char *buffer = buffer_;
of_char16_t *UTF16;
size_t j = 0;
OFDataArray *rest = nil;
UTF16 = [self allocMemoryWithSize: sizeof(of_char16_t)
count: length];
@try {
DWORD UTF16Len;
OFDataArray *rest = nil;
if (!ReadConsoleW(_handle, UTF16, length, &UTF16Len, NULL))
@throw [OFReadFailedException
exceptionWithObject: self
requestedLength: length];
if (UTF16Len > 0 && _incompleteSurrogate != 0) {
of_unichar_t c =
(((_incompleteSurrogate & 0x3FF) << 10) |
(UTF16[0] & 0x3FF)) + 0x10000;
char UTF8[4];
size_t UTF8Len;
if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
@throw [OFInvalidEncodingException exception];
if (UTF8Len <= length) {
memcpy(buffer, UTF8, UTF8Len);
j += UTF8Len;
} else {
if (rest == nil)
rest = [OFDataArray dataArray];
[rest addItems: UTF8
count: UTF8Len];
}
_incompleteSurrogate = 0;
}
for (size_t i = 0; i < UTF16Len; i++) {
of_unichar_t c = UTF16[i];
char UTF8[4];
size_t UTF8Len;
/* Missing high surrogate */
if ((c & 0xFC00) == 0xDC00)
@throw [OFInvalidEncodingException exception];
if ((c & 0xFC00) == 0xD800) {
of_char16_t next;
if (UTF16Len <= i + 1)
@throw [OFInvalidEncodingException
exception];
if (UTF16Len <= i + 1) {
_incompleteSurrogate = c;
if (rest != nil) {
char *items = [rest items];
size_t count = [rest count];
[self unreadFromBuffer: items
length: count];
}
objc_autoreleasePoolPop(pool);
return j;
}
next = UTF16[i + 1];
if ((next & 0xFC00) != 0xDC00)
@throw [OFInvalidEncodingException
exception];
|