ObjFW  Diff

Differences From Artifact [117f6c572a]:

To Artifact [3aed091d75]:


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
 * For example, on Windows XP, when using Windows XP's console, changing the
 * codepage to UTF-8 mostly breaks write() and completely breaks read():
 * write() suddenly returns the number of characters - instead of bytes -
 * written and read() just returns 0 as soon as a Unicode character is being
 * read.
 *
 * Therefore, instead of just using the UTF-8 codepage, this captures all reads
 * and writes to of_std{in,out,err} on the low level, interprets the buffer as
 * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
 * Doing so is safe, as the console only supports text anyway and thus it does
 * not matter if binary gets garbled by the conversion (e.g. because invalid
 * UTF-8 gets converted to U+FFFD).
 *
 * In order to not do this when redirecting input / output to a file (as the
 * file would then be read / written in the wrong encoding and break reading /







|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
 * For example, on Windows XP, when using Windows XP's console, changing the
 * codepage to UTF-8 mostly breaks write() and completely breaks read():
 * write() suddenly returns the number of characters - instead of bytes -
 * written and read() just returns 0 as soon as a Unicode character is being
 * read.
 *
 * Therefore, instead of just using the UTF-8 codepage, this captures all reads
 * and writes to OFStd{In,Out,Err} on the low level, interprets the buffer as
 * UTF-8 and converts to / from UTF-16 to use ReadConsoleW() / WriteConsoleW().
 * Doing so is safe, as the console only supports text anyway and thus it does
 * not matter if binary gets garbled by the conversion (e.g. because invalid
 * UTF-8 gets converted to U+FFFD).
 *
 * In order to not do this when redirecting input / output to a file (as the
 * file would then be read / written in the wrong encoding and break reading /
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
	int fd;

	if (self != [OFWin32ConsoleStdIOStream class])
		return;

	if ((fd = _fileno(stdin)) >= 0)
		of_stdin = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
	if ((fd = _fileno(stdout)) >= 0)
		of_stdout = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
	if ((fd = _fileno(stderr)) >= 0)
		of_stderr = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
}

- (instancetype)of_initWithFileDescriptor: (int)fd
{
	self = [super of_initWithFileDescriptor: fd];








|


|


|







84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
	int fd;

	if (self != [OFWin32ConsoleStdIOStream class])
		return;

	if ((fd = _fileno(stdin)) >= 0)
		OFStdIn = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
	if ((fd = _fileno(stdout)) >= 0)
		OFStdOut = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
	if ((fd = _fileno(stderr)) >= 0)
		OFStdErr = [[OFWin32ConsoleStdIOStream alloc]
		    of_initWithFileDescriptor: fd];
}

- (instancetype)of_initWithFileDescriptor: (int)fd
{
	self = [super of_initWithFileDescriptor: fd];

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
		if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) {
			OFUnichar c =
			    (((_incompleteUTF16Surrogate & 0x3FF) << 10) |
			    (UTF16[0] & 0x3FF)) + 0x10000;
			char UTF8[4];
			size_t UTF8Len;

			if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (UTF8Len <= length) {
				memcpy(buffer, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)







|







175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
		if (UTF16Len > 0 && _incompleteUTF16Surrogate != 0) {
			OFUnichar c =
			    (((_incompleteUTF16Surrogate & 0x3FF) << 10) |
			    (UTF16[0] & 0x3FF)) + 0x10000;
			char UTF8[4];
			size_t UTF8Len;

			if ((UTF8Len = OFUTF8StringEncode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (UTF8Len <= length) {
				memcpy(buffer, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246

				c = (((c & 0x3FF) << 10) | (next & 0x3FF)) +
				    0x10000;

				i++;
			}

			if ((UTF8Len = of_string_utf8_encode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (j + UTF8Len <= length) {
				memcpy(buffer + j, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)







|







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246

				c = (((c & 0x3FF) << 10) | (next & 0x3FF)) +
				    0x10000;

				i++;
			}

			if ((UTF8Len = OFUTF8StringEncode(c, UTF8)) == 0)
				@throw [OFInvalidEncodingException exception];

			if (j + UTF8Len <= length) {
				memcpy(buffer + j, UTF8, UTF8Len);
				j += UTF8Len;
			} else {
				if (rest == nil)
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
	if (_incompleteUTF8SurrogateLen > 0) {
		OFUnichar c;
		OFChar16 UTF16[2];
		ssize_t UTF8Len;
		size_t toCopy;
		DWORD UTF16Len, bytesWritten;

		UTF8Len = -of_string_utf8_decode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		OFEnsure(UTF8Len > 0);

		toCopy = UTF8Len - _incompleteUTF8SurrogateLen;
		if (toCopy > length)
			toCopy = length;

		memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen,
		    buffer, toCopy);
		_incompleteUTF8SurrogateLen += toCopy;

		if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len)
			return 0;

		UTF8Len = of_string_utf8_decode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		if (UTF8Len <= 0 || c > 0x10FFFF) {
			assert(UTF8Len == 0 || UTF8Len < -4);

			UTF16[0] = 0xFFFD;
			UTF16Len = 1;







|















|







273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
	if (_incompleteUTF8SurrogateLen > 0) {
		OFUnichar c;
		OFChar16 UTF16[2];
		ssize_t UTF8Len;
		size_t toCopy;
		DWORD UTF16Len, bytesWritten;

		UTF8Len = -OFUTF8StringDecode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		OFEnsure(UTF8Len > 0);

		toCopy = UTF8Len - _incompleteUTF8SurrogateLen;
		if (toCopy > length)
			toCopy = length;

		memcpy(_incompleteUTF8Surrogate + _incompleteUTF8SurrogateLen,
		    buffer, toCopy);
		_incompleteUTF8SurrogateLen += toCopy;

		if (_incompleteUTF8SurrogateLen < (size_t)UTF8Len)
			return 0;

		UTF8Len = OFUTF8StringDecode(
		    _incompleteUTF8Surrogate, _incompleteUTF8SurrogateLen, &c);

		if (UTF8Len <= 0 || c > 0x10FFFF) {
			assert(UTF8Len == 0 || UTF8Len < -4);

			UTF16[0] = 0xFFFD;
			UTF16Len = 1;
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
	@try {
		DWORD bytesWritten;

		while (i < length) {
			OFUnichar c;
			ssize_t UTF8Len;

			UTF8Len = of_string_utf8_decode(buffer + i, length - i,
			    &c);

			if (UTF8Len < 0 && UTF8Len >= -4) {
				OFEnsure(length - i < 4);

				memcpy(_incompleteUTF8Surrogate, buffer + i,
				    length - i);







|







361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
	@try {
		DWORD bytesWritten;

		while (i < length) {
			OFUnichar c;
			ssize_t UTF8Len;

			UTF8Len = OFUTF8StringDecode(buffer + i, length - i,
			    &c);

			if (UTF8Len < 0 && UTF8Len >= -4) {
				OFEnsure(length - i < 4);

				memcpy(_incompleteUTF8Surrogate, buffer + i,
				    length - i);