ObjFW  Diff

Differences From Artifact [ce12e0f51c]:

To Artifact [97c02a6c42]:

  • File src/OFString_UTF8.m — part of check-in [50916b8dbe] at 2013-01-11 12:41:54 on branch trunk — OFString: Improve API for characters / UTF-32.

    With this change, there is a clear separation between characters and
    UTF-32 strings now. Characters are just an array of characters in the
    native endianess, with no BOM prepended. UTF-32 on the other hand may
    have a BOM and can be swapped and is optionally zero-terminated.

    This also fixes a few missing UTF-16 init methods in OFMutableString. (user: js, size: 30904) [annotate] [blame] [check-ins using]


400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
	}

	return self;
}

- initWithCharacters: (const of_unichar_t*)characters
	      length: (size_t)length
	   byteOrder: (of_byte_order_t)byteOrder
{
	self = [super init];

	@try {
		size_t i, j = 0;
		BOOL swap = NO;

		if (length > 0 && *characters == 0xFEFF) {
			characters++;
			length--;
		} else if (length > 0 && *characters == 0xFFFE0000) {
			swap = YES;
			characters++;
			length--;
		} else if (byteOrder != OF_BYTE_ORDER_NATIVE)
			swap = YES;

		s = &s_store;

		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			size_t characterLen = of_string_utf8_encode(
			    (swap ? OF_BSWAP32(characters[i]) : characters[i]),
			    buffer);

			switch (characterLen) {
			case 1:
				s->cString[j++] = buffer[0];
				break;
			case 2:
				s->isUTF8 = YES;

				memcpy(s->cString + j, buffer, 2);







<





<
<
<
<
<
<
<
<
<
<
<








<
<
<

|







400
401
402
403
404
405
406

407
408
409
410
411











412
413
414
415
416
417
418
419



420
421
422
423
424
425
426
427
428
	}

	return self;
}

- initWithCharacters: (const of_unichar_t*)characters
	      length: (size_t)length

{
	self = [super init];

	@try {
		size_t i, j = 0;












		s = &s_store;

		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];




			switch (of_string_utf8_encode(characters[i], buffer)) {
			case 1:
				s->cString[j++] = buffer[0];
				break;
			case 2:
				s->isUTF8 = YES;

				memcpy(s->cString + j, buffer, 2);
578
579
580
581
582
583
584















































































585
586
587
588
589
590
591
		}

		s->cString[j] = '\0';

		@try {
			s->cString = [self resizeMemory: s->cString
						   size: s->cStringLength + 1];















































































		} @catch (OFOutOfMemoryException *e) {
			/* We don't care, as we only tried to make it smaller */
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
		}

		s->cString[j] = '\0';

		@try {
			s->cString = [self resizeMemory: s->cString
						   size: s->cStringLength + 1];
		} @catch (OFOutOfMemoryException *e) {
			/* We don't care, as we only tried to make it smaller */
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}

- initWithUTF32String: (const of_unichar_t*)characters
	       length: (size_t)length
	    byteOrder: (of_byte_order_t)byteOrder
{
	self = [super init];

	@try {
		size_t i, j = 0;
		BOOL swap = NO;

		if (length > 0 && *characters == 0xFEFF) {
			characters++;
			length--;
		} else if (length > 0 && *characters == 0xFFFE0000) {
			swap = YES;
			characters++;
			length--;
		} else if (byteOrder != OF_BYTE_ORDER_NATIVE)
			swap = YES;

		s = &s_store;

		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			size_t characterLen = of_string_utf8_encode(
			    (swap ? OF_BSWAP32(characters[i]) : characters[i]),
			    buffer);

			switch (characterLen) {
			case 1:
				s->cString[j++] = buffer[0];
				break;
			case 2:
				s->isUTF8 = YES;

				memcpy(s->cString + j, buffer, 2);
				j += 2;

				break;
			case 3:
				s->isUTF8 = YES;

				memcpy(s->cString + j, buffer, 3);
				j += 3;

				break;
			case 4:
				s->isUTF8 = YES;

				memcpy(s->cString + j, buffer, 4);
				j += 4;

				break;
			default:
				@throw [OFInvalidEncodingException
				    exceptionWithClass: [self class]];
			}
		}

		s->cString[j] = '\0';
		s->cStringLength = j;

		@try {
			s->cString = [self resizeMemory: s->cString
						   size: j + 1];
		} @catch (OFOutOfMemoryException *e) {
			/* We don't care, as we only tried to make it smaller */
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}