ObjFW  Check-in [3a0fdb6701]

Overview
Comment:OFStdIOStream_Win32Console: Improve writing

When writing an incomplete surrogate, it now writes everything up to
that incomplete surrogate, remembers the incomplete surrogate and writes
it as soon as the surrogate is completed by a following write.

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 3a0fdb670130629b9fcbdf5e878b9845443a1c4a93ca0c5a773927717cd13767
User & Date: js on 2016-03-13 19:33:09
Other Links: manifest | tags
Context
2016-03-13
20:04
OFStdIOStream_Win32Console: Use U+FFFD, not U+FFFE check-in: 9d70e660ea user: js tags: trunk
19:33
OFStdIOStream_Win32Console: Improve writing check-in: 3a0fdb6701 user: js tags: trunk
13:16
of_string_utf8_decode(): Check all surrogate bytes check-in: a83b261f72 user: js tags: trunk
Changes

Modified src/OFStdIOStream_Win32Console.h from [34f51c6008] to [b8fefbb451].

19
20
21
22
23
24
25
26


27
28
29
30
#import "OFStdIOStream.h"

OF_ASSUME_NONNULL_BEGIN

@interface OFStdIOStream_Win32Console: OFStdIOStream
{
	HANDLE _handle;
	of_char16_t _incompleteSurrogate;


}
@end

OF_ASSUME_NONNULL_END







|
>
>




19
20
21
22
23
24
25
26
27
28
29
30
31
32
#import "OFStdIOStream.h"

OF_ASSUME_NONNULL_BEGIN

@interface OFStdIOStream_Win32Console: OFStdIOStream
{
	HANDLE _handle;
	of_char16_t _incompleteUTF16Surrogate;
	char _incompleteUTF8Surrogate[4];
	size_t _incompleteUTF8SurrogateLen;
}
@end

OF_ASSUME_NONNULL_END

Modified src/OFStdIOStream_Win32Console.m from [f55d77f8b8] to [c806288ec6].

26
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42


43
44
45
46
47
48
49
 * written and read() just returns 0 as soon as a Unicode character is being
 * read.
 *
 * Therefore, instead of just using the UTF-8 codepage, this captures all reads
 * and writes to of_std{in,out,err} on the lowlevel, interprets the buffer as
 * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
 * Doing so is safe, as the console only supports text anyway and thus it does
 * not matter if binary gets garbled by the conversion.

 *
 * In order to not do this when redirecting input / output to a file (as the
 * file would then be read / written in the wrong encoding and break reading /
 * writing binary), it checks that the handle is indeed a console.
 */

#define OF_STDIO_STREAM_WIN32_CONSOLE_M

#include "config.h"



#import "OFStdIOStream_Win32Console.h"
#import "OFStdIOStream+Private.h"
#import "OFString.h"
#import "OFDataArray.h"

#import "OFInvalidArgumentException.h"







|
>









>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
 * written and read() just returns 0 as soon as a Unicode character is being
 * read.
 *
 * Therefore, instead of just using the UTF-8 codepage, this captures all reads
 * and writes to of_std{in,out,err} on the lowlevel, interprets the buffer as
 * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
 * Doing so is safe, as the console only supports text anyway and thus it does
 * not matter if binary gets garbled by the conversion (e.g. because invalid
 * UTF-8 gets converted to U+FFFE).
 *
 * In order to not do this when redirecting input / output to a file (as the
 * file would then be read / written in the wrong encoding and break reading /
 * writing binary), it checks that the handle is indeed a console.
 */

#define OF_STDIO_STREAM_WIN32_CONSOLE_M

#include "config.h"

#include <assert.h>

#import "OFStdIOStream_Win32Console.h"
#import "OFStdIOStream+Private.h"
#import "OFString.h"
#import "OFDataArray.h"

#import "OFInvalidArgumentException.h"
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
	@try {
		DWORD UTF16Len;
		OFDataArray *rest = nil;

		if (!ReadConsoleW(_handle, UTF16, length, &UTF16Len, NULL))
			@throw [OFReadFailedException
			    exceptionWithObject: self
				requestedLength: length];

		if (UTF16Len > 0 && _incompleteSurrogate != 0) {
			of_unichar_t c =
			    (((_incompleteSurrogate & 0x3FF) << 10) |
			    (UTF16[0] & 0x3FF)) + 0x10000;
			char UTF8[4];
			size_t UTF8Len;

			if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (UTF8Len <= length) {
				memcpy(buffer, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)
					rest = [OFDataArray dataArray];

				[rest addItems: UTF8
					 count: UTF8Len];
			}

			_incompleteSurrogate = 0;
		}

		for (size_t i = 0; i < UTF16Len; i++) {
			of_unichar_t c = UTF16[i];
			char UTF8[4];
			size_t UTF8Len;

			/* Missing high surrogate */
			if ((c & 0xFC00) == 0xDC00)
				@throw [OFInvalidEncodingException exception];

			if ((c & 0xFC00) == 0xD800) {
				of_char16_t next;

				if (UTF16Len <= i + 1) {
					_incompleteSurrogate = c;

					if (rest != nil) {
						char *items = [rest items];
						size_t count = [rest count];

						[self unreadFromBuffer: items
								length: count];







|

|

|


















|















|







113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
	@try {
		DWORD UTF16Len;
		OFDataArray *rest = nil;

		if (!ReadConsoleW(_handle, UTF16, length, &UTF16Len, NULL))
			@throw [OFReadFailedException
			    exceptionWithObject: self
				requestedLength: length * 2];

		if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) {
			of_unichar_t c =
			    (((_incompleteUTF16Surrogate & 0x3FF) << 10) |
			    (UTF16[0] & 0x3FF)) + 0x10000;
			char UTF8[4];
			size_t UTF8Len;

			if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (UTF8Len <= length) {
				memcpy(buffer, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)
					rest = [OFDataArray dataArray];

				[rest addItems: UTF8
					 count: UTF8Len];
			}

			_incompleteUTF16Surrogate = 0;
		}

		for (size_t i = 0; i < UTF16Len; i++) {
			of_unichar_t c = UTF16[i];
			char UTF8[4];
			size_t UTF8Len;

			/* Missing high surrogate */
			if ((c & 0xFC00) == 0xDC00)
				@throw [OFInvalidEncodingException exception];

			if ((c & 0xFC00) == 0xD800) {
				of_char16_t next;

				if (UTF16Len <= i + 1) {
					_incompleteUTF16Surrogate = c;

					if (rest != nil) {
						char *items = [rest items];
						size_t count = [rest count];

						[self unreadFromBuffer: items
								length: count];
208
209
210
211
212
213
214

215
216
217
218





















































219
220
221
222
223
224
225
226
227
228
229
230
231










232
233




234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
}

- (void)lowlevelWriteBuffer: (const void*)buffer_
		     length: (size_t)length
{
	const char *buffer = buffer_;
	of_char16_t *tmp;


	if (length > SIZE_MAX / 2)
		@throw [OFOutOfRangeException exception];






















































	tmp = [self allocMemoryWithSize: sizeof(of_char16_t)
				  count: length * 2];
	@try {
		size_t i = 0, j = 0;
		DWORD written;

		while (i < length) {
			of_unichar_t c;
			size_t UTF8Len;

			UTF8Len = of_string_utf8_decode(buffer + i, length - i,
			    &c);











			if (UTF8Len <= 0 || c > 0x10FFFF)
				@throw [OFInvalidEncodingException exception];





			if (c > 0xFFFF) {
				c -= 0x10000;
				tmp[j++] = 0xD800 | (c >> 10);
				tmp[j++] = 0xDC00 | (c & 0x3FF);
			} else
				tmp[j++] = c;

			i += UTF8Len;
		}

		if (!WriteConsoleW(_handle, tmp, j, &written, NULL) ||
		    written != j)
			@throw [OFWriteFailedException
			    exceptionWithObject: self
				requestedLength: j];
	} @finally {
		[self freeMemory: tmp];
	}
}
@end







>




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



<




|




>
>
>
>
>
>
>
>
>
>
|
<
>
>
>
>















|





211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
}

- (void)lowlevelWriteBuffer: (const void*)buffer_
		     length: (size_t)length
{
	const char *buffer = buffer_;
	of_char16_t *tmp;
	size_t i = 0, j = 0;

	if (length > SIZE_MAX / 2)
		@throw [OFOutOfRangeException exception];

	if (_incompleteUTF8SurrogateLen > 0) {
		of_unichar_t c;
		of_char16_t UTF16[2];
		ssize_t UTF8Len;
		size_t toCopy, UTF16Len;
		DWORD written;

		UTF8Len = -of_string_utf8_decode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		OF_ENSURE(UTF8Len > 0);

		toCopy = UTF8Len - _incompleteUTF8SurrogateLen;
		if (toCopy > length)
			toCopy = length;

		memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen,
		    buffer, toCopy);
		_incompleteUTF8SurrogateLen += toCopy;

		if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len)
			return;

		UTF8Len = of_string_utf8_decode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		if (UTF8Len <= 0 || c > 0x10FFFF) {
			assert(UTF8Len == 0 || UTF8Len < -4);

			UTF16[0] = 0xFFFE;
			UTF16Len = 1;
		} else {
			if (c > 0xFFFF) {
				c -= 0x10000;
				UTF16[0] = 0xD800 | (c >> 10);
				UTF16[1] = 0xDC00 | (c & 0x3FF);
				UTF16Len = 2;
			} else {
				UTF16[0] = c;
				UTF16Len = 1;
			}
		}

		if (!WriteConsoleW(_handle, UTF16, UTF16Len, &written, NULL) ||
		    written != UTF16Len)
			@throw [OFWriteFailedException
			    exceptionWithObject: self
				requestedLength: UTF16Len * 2];

		_incompleteUTF8SurrogateLen = 0;
		i += toCopy;
	}

	tmp = [self allocMemoryWithSize: sizeof(of_char16_t)
				  count: length * 2];
	@try {

		DWORD written;

		while (i < length) {
			of_unichar_t c;
			ssize_t UTF8Len;

			UTF8Len = of_string_utf8_decode(buffer + i, length - i,
			    &c);

			if (UTF8Len < 0 && UTF8Len >= -4) {
				OF_ENSURE(length - i < 4);

				memcpy(_incompleteUTF8Surrogate, buffer + i,
				    length - i);
				_incompleteUTF8SurrogateLen = length - i;

				break;
			}

			if (UTF8Len <= 0 || c > 0x10FFFF) {

				tmp[j++] = 0xFFFE;
				i++;
				continue;
			}

			if (c > 0xFFFF) {
				c -= 0x10000;
				tmp[j++] = 0xD800 | (c >> 10);
				tmp[j++] = 0xDC00 | (c & 0x3FF);
			} else
				tmp[j++] = c;

			i += UTF8Len;
		}

		if (!WriteConsoleW(_handle, tmp, j, &written, NULL) ||
		    written != j)
			@throw [OFWriteFailedException
			    exceptionWithObject: self
				requestedLength: j * 2];
	} @finally {
		[self freeMemory: tmp];
	}
}
@end