ICU 73.2
73.2
common
unicode
utf16.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 1999-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: utf16.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 1999sep09
16
* created by: Markus W. Scherer
17
*/
18
34
#ifndef __UTF16_H__
35
#define __UTF16_H__
36
37
#include <stdbool.h>
38
#include "
unicode/umachine.h
"
39
#ifndef __UTF_H__
40
# include "
unicode/utf.h
"
41
#endif
42
43
/* single-code point definitions -------------------------------------------- */
44
51
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
52
59
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
60
67
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
68
75
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
76
84
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
85
93
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
94
99
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
100
112
#define U16_GET_SUPPLEMENTARY(lead, trail) \
113
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
114
115
123
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
124
132
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
133
141
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
142
148
#define U16_MAX_LENGTH 2
149
167
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
168
(c)=(s)[i]; \
169
if(U16_IS_SURROGATE(c)) { \
170
if(U16_IS_SURROGATE_LEAD(c)) { \
171
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
172
} else { \
173
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
174
} \
175
} \
176
} UPRV_BLOCK_MACRO_END
177
201
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
202
(c)=(s)[i]; \
203
if(U16_IS_SURROGATE(c)) { \
204
uint16_t __c2; \
205
if(U16_IS_SURROGATE_LEAD(c)) { \
206
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
207
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
208
} \
209
} else { \
210
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
211
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
212
} \
213
} \
214
} \
215
} UPRV_BLOCK_MACRO_END
216
240
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
241
(c)=(s)[i]; \
242
if(U16_IS_SURROGATE(c)) { \
243
uint16_t __c2; \
244
if(U16_IS_SURROGATE_LEAD(c)) { \
245
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
246
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
247
} else { \
248
(c)=0xfffd; \
249
} \
250
} else { \
251
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
252
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
253
} else { \
254
(c)=0xfffd; \
255
} \
256
} \
257
} \
258
} UPRV_BLOCK_MACRO_END
259
260
/* definitions with forward iteration --------------------------------------- */
261
281
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
282
(c)=(s)[(i)++]; \
283
if(U16_IS_LEAD(c)) { \
284
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
285
} \
286
} UPRV_BLOCK_MACRO_END
287
309
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
310
(c)=(s)[(i)++]; \
311
if(U16_IS_LEAD(c)) { \
312
uint16_t __c2; \
313
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
314
++(i); \
315
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
316
} \
317
} \
318
} UPRV_BLOCK_MACRO_END
319
341
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
342
(c)=(s)[(i)++]; \
343
if(U16_IS_SURROGATE(c)) { \
344
uint16_t __c2; \
345
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
346
++(i); \
347
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
348
} else { \
349
(c)=0xfffd; \
350
} \
351
} \
352
} UPRV_BLOCK_MACRO_END
353
367
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
368
if((uint32_t)(c)<=0xffff) { \
369
(s)[(i)++]=(uint16_t)(c); \
370
} else { \
371
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
372
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
373
} \
374
} UPRV_BLOCK_MACRO_END
375
393
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
394
if((uint32_t)(c)<=0xffff) { \
395
(s)[(i)++]=(uint16_t)(c); \
396
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
397
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
398
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
399
} else
/* c>0x10ffff or not enough space */
{ \
400
(isError)=true; \
401
} \
402
} UPRV_BLOCK_MACRO_END
403
414
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
415
if(U16_IS_LEAD((s)[(i)++])) { \
416
++(i); \
417
} \
418
} UPRV_BLOCK_MACRO_END
419
433
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
434
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
435
++(i); \
436
} \
437
} UPRV_BLOCK_MACRO_END
438
451
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
452
int32_t __N=(n); \
453
while(__N>0) { \
454
U16_FWD_1_UNSAFE(s, i); \
455
--__N; \
456
} \
457
} UPRV_BLOCK_MACRO_END
458
474
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
475
int32_t __N=(n); \
476
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
477
U16_FWD_1(s, i, length); \
478
--__N; \
479
} \
480
} UPRV_BLOCK_MACRO_END
481
495
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
496
if(U16_IS_TRAIL((s)[i])) { \
497
--(i); \
498
} \
499
} UPRV_BLOCK_MACRO_END
500
515
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
516
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
517
--(i); \
518
} \
519
} UPRV_BLOCK_MACRO_END
520
521
/* definitions with backward iteration -------------------------------------- */
522
543
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
544
(c)=(s)[--(i)]; \
545
if(U16_IS_TRAIL(c)) { \
546
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
547
} \
548
} UPRV_BLOCK_MACRO_END
549
570
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
571
(c)=(s)[--(i)]; \
572
if(U16_IS_TRAIL(c)) { \
573
uint16_t __c2; \
574
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
575
--(i); \
576
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
577
} \
578
} \
579
} UPRV_BLOCK_MACRO_END
580
601
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
602
(c)=(s)[--(i)]; \
603
if(U16_IS_SURROGATE(c)) { \
604
uint16_t __c2; \
605
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
606
--(i); \
607
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
608
} else { \
609
(c)=0xfffd; \
610
} \
611
} \
612
} UPRV_BLOCK_MACRO_END
613
625
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
626
if(U16_IS_TRAIL((s)[--(i)])) { \
627
--(i); \
628
} \
629
} UPRV_BLOCK_MACRO_END
630
643
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
644
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
645
--(i); \
646
} \
647
} UPRV_BLOCK_MACRO_END
648
662
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
663
int32_t __N=(n); \
664
while(__N>0) { \
665
U16_BACK_1_UNSAFE(s, i); \
666
--__N; \
667
} \
668
} UPRV_BLOCK_MACRO_END
669
684
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
685
int32_t __N=(n); \
686
while(__N>0 && (i)>(start)) { \
687
U16_BACK_1(s, start, i); \
688
--__N; \
689
} \
690
} UPRV_BLOCK_MACRO_END
691
705
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
706
if(U16_IS_LEAD((s)[(i)-1])) { \
707
++(i); \
708
} \
709
} UPRV_BLOCK_MACRO_END
710
728
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
729
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
730
++(i); \
731
} \
732
} UPRV_BLOCK_MACRO_END
733
734
#endif
utf.h
C API: Code point macros.
umachine.h
Basic types and constants for UTF.
Generated by
1.8.17