ICU 73.2  73.2
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 
50 typedef struct USet USet;
51 #endif
52 
64 enum {
70 
98 
111 
112 #ifndef U_HIDE_DRAFT_API
113 
126 #endif // U_HIDE_DRAFT_API
127 };
128 
184 typedef enum USetSpanCondition {
233 #ifndef U_HIDE_DEPRECATED_API
234 
239 #endif // U_HIDE_DEPRECATED_API
241 
242 enum {
250 };
251 
257 typedef struct USerializedSet {
262  const uint16_t *array;
267  int32_t bmpLength;
272  int32_t length;
279 
280 /*********************************************************************
281  * USet API
282  *********************************************************************/
283 
291 U_CAPI USet* U_EXPORT2
292 uset_openEmpty(void);
293 
304 U_CAPI USet* U_EXPORT2
305 uset_open(UChar32 start, UChar32 end);
306 
316 U_CAPI USet* U_EXPORT2
317 uset_openPattern(const UChar* pattern, int32_t patternLength,
318  UErrorCode* ec);
319 
333 U_CAPI USet* U_EXPORT2
334 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
335  uint32_t options,
336  UErrorCode* ec);
337 
344 U_CAPI void U_EXPORT2
345 uset_close(USet* set);
346 
347 #if U_SHOW_CPLUSPLUS_API
348 
349 U_NAMESPACE_BEGIN
350 
361 
362 U_NAMESPACE_END
363 
364 #endif
365 
375 U_CAPI USet * U_EXPORT2
376 uset_clone(const USet *set);
377 
387 U_CAPI UBool U_EXPORT2
388 uset_isFrozen(const USet *set);
389 
404 U_CAPI void U_EXPORT2
405 uset_freeze(USet *set);
406 
417 U_CAPI USet * U_EXPORT2
418 uset_cloneAsThawed(const USet *set);
419 
429 U_CAPI void U_EXPORT2
430 uset_set(USet* set,
431  UChar32 start, UChar32 end);
432 
457 U_CAPI int32_t U_EXPORT2
459  const UChar *pattern, int32_t patternLength,
460  uint32_t options,
461  UErrorCode *status);
462 
485 U_CAPI void U_EXPORT2
487  UProperty prop, int32_t value, UErrorCode* ec);
488 
524 U_CAPI void U_EXPORT2
526  const UChar *prop, int32_t propLength,
527  const UChar *value, int32_t valueLength,
528  UErrorCode* ec);
529 
539 U_CAPI UBool U_EXPORT2
540 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
541  int32_t pos);
542 
558 U_CAPI int32_t U_EXPORT2
559 uset_toPattern(const USet* set,
560  UChar* result, int32_t resultCapacity,
561  UBool escapeUnprintable,
562  UErrorCode* ec);
563 
572 U_CAPI void U_EXPORT2
573 uset_add(USet* set, UChar32 c);
574 
587 U_CAPI void U_EXPORT2
588 uset_addAll(USet* set, const USet *additionalSet);
589 
599 U_CAPI void U_EXPORT2
600 uset_addRange(USet* set, UChar32 start, UChar32 end);
601 
611 U_CAPI void U_EXPORT2
612 uset_addString(USet* set, const UChar* str, int32_t strLen);
613 
623 U_CAPI void U_EXPORT2
624 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
625 
634 U_CAPI void U_EXPORT2
635 uset_remove(USet* set, UChar32 c);
636 
646 U_CAPI void U_EXPORT2
647 uset_removeRange(USet* set, UChar32 start, UChar32 end);
648 
658 U_CAPI void U_EXPORT2
659 uset_removeString(USet* set, const UChar* str, int32_t strLen);
660 
670 U_CAPI void U_EXPORT2
671 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
672 
684 U_CAPI void U_EXPORT2
685 uset_removeAll(USet* set, const USet* removeSet);
686 
699 U_CAPI void U_EXPORT2
700 uset_retain(USet* set, UChar32 start, UChar32 end);
701 
713 U_CAPI void U_EXPORT2
714 uset_retainString(USet *set, const UChar *str, int32_t length);
715 
725 U_CAPI void U_EXPORT2
726 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
727 
740 U_CAPI void U_EXPORT2
741 uset_retainAll(USet* set, const USet* retain);
742 
751 U_CAPI void U_EXPORT2
752 uset_compact(USet* set);
753 
767 U_CAPI void U_EXPORT2
768 uset_complement(USet* set);
769 
783 U_CAPI void U_EXPORT2
784 uset_complementRange(USet *set, UChar32 start, UChar32 end);
785 
796 U_CAPI void U_EXPORT2
797 uset_complementString(USet *set, const UChar *str, int32_t length);
798 
808 U_CAPI void U_EXPORT2
809 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
810 
822 U_CAPI void U_EXPORT2
823 uset_complementAll(USet* set, const USet* complement);
824 
832 U_CAPI void U_EXPORT2
833 uset_clear(USet* set);
834 
863 U_CAPI void U_EXPORT2
864 uset_closeOver(USet* set, int32_t attributes);
865 
872 U_CAPI void U_EXPORT2
874 
882 U_CAPI UBool U_EXPORT2
883 uset_isEmpty(const USet* set);
884 
890 U_CAPI UBool U_EXPORT2
891 uset_hasStrings(const USet *set);
892 
901 U_CAPI UBool U_EXPORT2
902 uset_contains(const USet* set, UChar32 c);
903 
913 U_CAPI UBool U_EXPORT2
914 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
915 
924 U_CAPI UBool U_EXPORT2
925 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
926 
937 U_CAPI int32_t U_EXPORT2
938 uset_indexOf(const USet* set, UChar32 c);
939 
955 U_CAPI UChar32 U_EXPORT2
956 uset_charAt(const USet* set, int32_t charIndex);
957 
971 U_CAPI int32_t U_EXPORT2
972 uset_size(const USet* set);
973 
982 U_CAPI int32_t U_EXPORT2
983 uset_getRangeCount(const USet *set);
984 
993 U_CAPI int32_t U_EXPORT2
994 uset_getItemCount(const USet* set);
995 
1024 U_CAPI int32_t U_EXPORT2
1025 uset_getItem(const USet* set, int32_t itemIndex,
1026  UChar32* start, UChar32* end,
1027  UChar* str, int32_t strCapacity,
1028  UErrorCode* ec);
1029 
1038 U_CAPI UBool U_EXPORT2
1039 uset_containsAll(const USet* set1, const USet* set2);
1040 
1051 U_CAPI UBool U_EXPORT2
1052 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1053 
1062 U_CAPI UBool U_EXPORT2
1063 uset_containsNone(const USet* set1, const USet* set2);
1064 
1073 U_CAPI UBool U_EXPORT2
1074 uset_containsSome(const USet* set1, const USet* set2);
1075 
1095 U_CAPI int32_t U_EXPORT2
1096 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1097 
1116 U_CAPI int32_t U_EXPORT2
1117 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1118 
1138 U_CAPI int32_t U_EXPORT2
1139 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1140 
1159 U_CAPI int32_t U_EXPORT2
1160 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1161 
1170 U_CAPI UBool U_EXPORT2
1171 uset_equals(const USet* set1, const USet* set2);
1172 
1173 /*********************************************************************
1174  * Serialized set API
1175  *********************************************************************/
1176 
1226 U_CAPI int32_t U_EXPORT2
1227 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1228 
1237 U_CAPI UBool U_EXPORT2
1238 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1239 
1247 U_CAPI void U_EXPORT2
1249 
1258 U_CAPI UBool U_EXPORT2
1260 
1270 U_CAPI int32_t U_EXPORT2
1272 
1286 U_CAPI UBool U_EXPORT2
1287 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1288  UChar32* pStart, UChar32* pEnd);
1289 
1290 #endif
uset_compact
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
uset_applyIntPropertyValue
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
uset_add
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
uset_getItemCount
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
uset_span
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
uset_retainString
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
uset_serializedContains
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
uset_removeString
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
uset_charAt
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
uset_addAll
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
USerializedSet::staticArray
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:277
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
USerializedSet::array
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:262
uset_removeRange
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
USET_SPAN_NOT_CONTAINED
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition: uset.h:197
USET_IGNORE_SPACE
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:69
UBool
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
uset_containsSome
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
uset_getSerializedRangeCount
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
uset_getSerializedSet
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
uset_set
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
USerializedSet
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
uset_setSerializedToOne
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
uset_getSerializedRange
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
uset_hasStrings
U_CAPI UBool uset_hasStrings(const USet *set)
UProperty
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:195
uset_remove
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
uset_serialize
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
USetSpanCondition
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:184
uset_spanUTF8
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
uset_close
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
uset_clear
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
uset_clone
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
uset_getItem
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
USET_SPAN_CONTAINED
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition: uset.h:212
uset_spanBack
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
uset_isFrozen
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:435
uset_getRangeCount
U_CAPI int32_t uset_getRangeCount(const USet *set)
UErrorCode
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
uset_isEmpty
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
USerializedSet::length
int32_t length
The total length of the array.
Definition: uset.h:272
uset_removeAllCodePoints
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
uset_retainAll
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
uset_containsAllCodePoints
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
uset_resemblesPattern
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
uset_containsNone
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
uset_addAllCodePoints
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
uset_complementAllCodePoints
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
uset_removeAllStrings
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
uset_complement
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
uset_retain
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_DEFINE_LOCAL_OPEN_POINTER
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
Definition: localpointer.h:550
uset_removeAll
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uset_complementRange
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
LocalUSetPointer
uset_openEmpty
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
uset_indexOf
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
uset_closeOver
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
uset_containsAll
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
uset_open
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
localpointer.h
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
uset_containsString
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
USET_SPAN_SIMPLE
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition: uset.h:232
uset_applyPattern
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
uset_equals
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
uset_complementString
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
USerializedSet
A serialized form of a Unicode set.
Definition: uset.h:257
USET_SIMPLE_CASE_INSENSITIVE
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:125
uset_containsRange
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
uset_contains
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
uset_cloneAsThawed
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
uchar.h
C API: Unicode Properties.
uset_addString
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
uset_openPattern
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
UChar
char16_t UChar
Definition: umachine.h:386
uset_retainAllCodePoints
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
uset_openPatternOptions
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
uset_complementAll
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
uset_size
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
USerializedSet::bmpLength
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:267
uset_freeze
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
USET_SPAN_CONDITION_COUNT
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition: uset.h:238
USet
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
uset_applyPropertyAlias
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
USET_SERIALIZED_STATIC_ARRAY_CAPACITY
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition: uset.h:249
USET_CASE_INSENSITIVE
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:97
uset_spanBackUTF8
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
uset_addRange
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USET_ADD_CASE_MAPPINGS
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition: uset.h:110
uset_toPattern
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.