#Unicode Invariant Results

# Invariance Tests

#

# This file provides a set of machine-readable invariance tests for Unicode Properties.

#


# Format


# Let <$variable> = <unicodeSet>

# Assign a variable to a value. The variable must start with $.

#

# <unicodeSet> is a boolean combinations of properties and character ranges, as defined in LDML,

# with the following extensions.

#

# Example:

# [\p{General_Category=Unassigned}-[a-zA-Z]]

#

# Property Name:

# <propertyName> can be the short or long form as in the PropertyAliases.txt

# <propertyName> can be prefixed with "U<version>:"

# Example: \p{U5.1.0:Whitespace}

#

# A version of -1 indicates the previous released version.

# For example, if the version is 4.0.1, then the U-1 version is 4.0.0

# Example: \p{U-1:Whitespace}

#

# Property Value:

# If the propertyValue is missing, it is defaulted to true

# If the value is of the form /.../, then the ... is interpreted as a regular expression

# The value (if enumerated) can be the short or long form as in PropertyValueAliases.txt


# Show <unicodeSet>

# List any set on the console, for viewing and debugging.


# Test <unicodeSet> <relation> <unicodeSet>

#

# Tests that the relation is true for the two sets. The "Test" keyword is optional.

#

# relation := '=' // has identical contents to

# := '⊃' // is proper superset of

# := '⊇' // is superset of

# := '⊂' // is proper subset of

# := '⊆' // is subset of

# := '∥' // has no intersection

# := '≉' // none of the above (they overlap, and neither contains the other)

#

# When this file is parsed, a parse error message may contain <@>

# to indicate the location of an error in the input line.

#

# If there is an error in the test, a comparison listing of the two sides of the relation is generated.


# In <unicodeSet> <props> (=|≠) <props>

#

# For each character in <unicodeSet>, verify that the result of applying the left <props>

# is (=|≠) the result of applying the right <props>.

# <props> is of the form (<unicodeSet> | <prop>) ("*" (<unicodeSet> | <prop>))?

# It is the functional composition of the properties applied to strings, whereby

# <unicodeSet> is used to filter the result.

# <prop> for a string property is applied to each character, and the result concatenated

# That is, cf("A1") is cf("A")+cf("1") = "a1"

# <prop> for an enumerated property, is applied to each character, and the result is a concatenated set.

# That is, gc("A1") is gc("A")+gc("1") = "Uppercase_LetterDecimal_Number"

#

# Example: for <props> of bc * \P{bc=NSM} * cf * dm, the result applied to Å (angstrom sign) are:

# bc * \P{bc=NSM} * cf * dm ("Å")

# bc * \P{bc=NSM} * cf ("A" + umlaut)

# bc * \P{bc=NSM} ("a" + umlaut)

# bc ("a")

# "Left"

#

# Example: In \p{dt=canonical} bc * \P{bc=NSM} * dm = bc * \P{bc=NSM}

# This examines only those characters that have canonical compositions. For each such character X

# it gets the decomposition mapping of X, then filters out all NSM characters, then gets the Bidi_Class.

# It then tests that against the result of filtering out NSM characters from X, then getting the BIDI_Class.

#


# General Constants



Let $foo = \p{ccc=9}

094D #Mn (्) DEVANAGARI SIGN VIRAMA
09CD #Mn (্) BENGALI SIGN VIRAMA
0A4D #Mn (੍) GURMUKHI SIGN VIRAMA
0ACD #Mn (્) GUJARATI SIGN VIRAMA
0B4D #Mn (୍) ORIYA SIGN VIRAMA
0BCD #Mn (்) TAMIL SIGN VIRAMA
0C4D #Mn (్) TELUGU SIGN VIRAMA
0CCD #Mn (್) KANNADA SIGN VIRAMA
0D4D #Mn (്) MALAYALAM SIGN VIRAMA
0DCA #Mn (්) SINHALA SIGN AL-LAKUNA
0E3A #Mn (ฺ) THAI CHARACTER PHINTHU
0F84 #Mn (྄) TIBETAN MARK HALANTA
1039..103A #Mn [2] (္..်) MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
1714 #Mn (᜔) TAGALOG SIGN VIRAMA
1734 #Mn (᜴) HANUNOO SIGN PAMUDPOD
17D2 #Mn (្) KHMER SIGN COENG
1A60 #Mn (᩠) TAI THAM SIGN SAKOT
1B44 #Mc (᭄) BALINESE ADEG ADEG
1BAA #Mc (᮪) SUNDANESE SIGN PAMAAEH
1BF2..1BF3 #Mc [2] (�..�) BATAK PANGOLAT..BATAK PANONGONAN

## Total: 31 (omitting 9 from listing)


Let $fii = \p{toNFD=/$foo/}

0DDA #Mc (ේ) SINHALA VOWEL SIGN DIGA KOMBUVA
0DDD #Mc (ෝ) SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA

## Total: 2



Let $gcAllPunctuation = \p{gc=/_Punctuation/}

0021..0023 #Po [3] (!..#) EXCLAMATION MARK..NUMBER SIGN
0025..0027 #Po [3] (%..') PERCENT SIGN..APOSTROPHE
0028 #Ps (() LEFT PARENTHESIS
0029 #Pe ()) RIGHT PARENTHESIS
002A #Po (*) ASTERISK
002C #Po (,) COMMA
002D #Pd (-) HYPHEN-MINUS
002E..002F #Po [2] (.../) FULL STOP..SOLIDUS
003A..003B #Po [2] (:..;) COLON..SEMICOLON
003F..0040 #Po [2] (?..@) QUESTION MARK..COMMERCIAL AT
005B #Ps ([) LEFT SQUARE BRACKET
005C #Po (\) REVERSE SOLIDUS
005D #Pe (]) RIGHT SQUARE BRACKET
005F #Pc (_) LOW LINE
007B #Ps ({) LEFT CURLY BRACKET
007D #Pe (}) RIGHT CURLY BRACKET
00A1 #Po (¡) INVERTED EXCLAMATION MARK
00AB #Pi («) LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00B7 #Po (·) MIDDLE DOT
00BB #Pf (») RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BF #Po (¿) INVERTED QUESTION MARK
037E #Po (;) GREEK QUESTION MARK
0387 #Po (·) GREEK ANO TELEIA
055A..055F #Po [6] (՚..՟) ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0589 #Po (։) ARMENIAN FULL STOP
058A #Pd (֊) ARMENIAN HYPHEN
05BE #Pd (־) HEBREW PUNCTUATION MAQAF
05C0 #Po (׀) HEBREW PUNCTUATION PASEQ

## Total: 600 (omitting 560 from listing)


$gcAllPunctuation = [\p{gc=Close_Punctuation}\p{gc=Connector_Punctuation}}\p{gc=Dash_Punctuation}\p{gc=Final_Punctuation}\p{gc=Initial_Punctuation}\p{gc=Open_Punctuation}\p{gc=}\p{gc=Other_Punctuation}]


Let $gcAllSymbols = \p{gc=/_Symbol/}

0024 #Sc ($) DOLLAR SIGN
002B #Sm (+) PLUS SIGN
003C..003E #Sm [3] (<..>) LESS-THAN SIGN..GREATER-THAN SIGN
005E #Sk (^) CIRCUMFLEX ACCENT
0060 #Sk (`) GRAVE ACCENT
007C #Sm (|) VERTICAL LINE
007E #Sm (~) TILDE
00A2..00A5 #Sc [4] (¢..¥) CENT SIGN..YEN SIGN
00A6..00A7 #So [2] (¦..§) BROKEN BAR..SECTION SIGN
00A8 #Sk (¨) DIAERESIS
00A9 #So (©) COPYRIGHT SIGN
00AC #Sm (¬) NOT SIGN
00AE #So (®) REGISTERED SIGN
00AF #Sk (¯) MACRON
00B0 #So (°) DEGREE SIGN
00B1 #Sm (±) PLUS-MINUS SIGN
00B4 #Sk (´) ACUTE ACCENT
00B6 #So (¶) PILCROW SIGN
00B8 #Sk (¸) CEDILLA
00D7 #Sm (×) MULTIPLICATION SIGN
00F7 #Sm (÷) DIVISION SIGN
02C2..02C5 #Sk [4] (˂..˅) MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02D2..02DF #Sk [14] (˒..˟) MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB #Sk [7] (˥..˫) MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02ED #Sk (˭) MODIFIER LETTER UNASPIRATED
02EF..02FF #Sk [17] (˯..˿) MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW

## Total: 5,504 (omitting 5,434 from listing)


$gcAllSymbols = [\p{gc=Math_Symbol}\p{gc=Currency_Symbol}\p{gc=Modifier_Symbol}\p{gc=Other_Symbol}]


Let $gcAllMarks = \p{gc=/_Mark/}

0300..036F #Mn [112] (̀..ͯ) COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0487 #Mn [5] (҃..҇) COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0488..0489 #Me [2] (҈..҉) COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD #Mn [45] (֑..ֽ) HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF #Mn (ֿ) HEBREW POINT RAFE
05C1..05C2 #Mn [2] (ׁ..ׂ) HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 #Mn [2] (ׄ..ׅ) HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 #Mn (ׇ) HEBREW POINT QAMATS QATAN
0610..061A #Mn [11] (ؐ..ؚ) ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
064B..065F #Mn [21] (ً..�) ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0670 #Mn (ٰ) ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC #Mn [7] (ۖ..ۜ) ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE #Me (۞) ARABIC START OF RUB EL HIZB
06DF..06E4 #Mn [6] (۟..ۤ) ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8 #Mn [2] (ۧ..ۨ) ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED #Mn [4] (۪..ۭ) ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
0711 #Mn (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A #Mn [27] (ܰ..݊) SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 #Mn [11] (ަ..ް) THAANA ABAFILI..THAANA SUKUN
07EB..07F3 #Mn [9] (߫..߳) NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0816..0819 #Mn [4] (ࠖ..࠙) SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 #Mn [9] (ࠛ..ࠣ) SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A

## Total: 1,499 (omitting 1,215 from listing)


$gcAllMarks = [\p{gc=Nonspacing_Mark}\p{gc=Enclosing_Mark}\p{gc=Spacing_Mark}]


Let $gcAllLetters = \p{gc=/_Letter/}

0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00B5 #L& (µ) MICRO SIGN
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 #L& [20] (Ύ..Ρ) GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 #L& [83] (Σ..ϵ) GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL

## Total: 100,520 (omitting 99,815 from listing)


$gcAllLetters = [\p{gc=Ll}\p{gc=Lu}\p{gc=Lo}\p{gc=Lt}\p{gc=Lm}]



# Main Stability Policies

# http://www.unicode.org/policies/property_value_stability_table.html



# TODO: Formal Name Alias Stability, Named Character Sequence Stability, Name Uniqueness,

# TODO: Identity Stability, Property Stability, Alias Stability, Property Alias Uniqueness


# Encoding Stability: Once a character is encoded, it will not be moved or removed.

\p{GC=unassigned} ⊆ \p{U-1:GC=unassigned}


# Name Stability: The Unicode Name property value for any non-reserved code point will not be changed. In particular, once a character is encoded, its name will not be changed.

In \P{U-1:GC=Cn} name=U-1:name


# Formal Name Alias Stability

# TODO


# Named Character Sequence Stability

# TODO


# Name Uniqueness

# TODO


# Strong Normalization Stability (decomposition mapping, Canonical Combining Class don't change)

# In Property Section


# Identity Stability

# Can't be tested


# Property Stability: Normative and informative properties, once defined in the Unicode Character Database, will never be removed.

# TODO


# Alias Stability: Property aliases and property value aliases, once defined in the Unicode Character Database, will never be removed.

# TODO


# Property Alias Uniqueness: All property aliases constitute a single namespace. Property aliases are guaranteed to be unique within this namespace. For each property, all of its property value aliases constitute a separate namespace, one per property. Within each of these property value alias namespaces, property value aliases are guaranteed to be unique.

# TODO


# Identifier Stability: All strings that are valid default Unicode identifiers will continue to be valid default Unicode identifiers in all subsequent versions of Unicode. Furthermore, default identifiers never contain characters with the Pattern_Syntax or Pattern_White_Space properties.

# Covered in Property Stability Section


# Case Folding Stability: Caseless matching of Unicode strings used for identifiers is stable.

# TODO


# Case Pair Stability: If two characters form a case pair in a version of Unicode, they will remain a case pair in each subsequent version of Unicode. If two characters do not form a case pair in a version of Unicode, they will never become a case pair in any subsequent version of Unicode.

# TODO


# Red Flag: cased and case_ignorable should be disjoint


Let $caseOverlap = [\u02B0-\u02B8\u02C0\u02C1\u02E0-\u02E4\u0345\u037A\u1D2C-\u1D61\u1D78\u1D9B-\u1DBF\u2090-\u2094\u2C7D\uA770]

02B0..02B8 #Lm [9] (ʰ..ʸ) MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
02C0..02C1 #Lm [2] (ˀ..ˁ) MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
0345 #Mn (ͅ) COMBINING GREEK YPOGEGRAMMENI
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
1D2C..1D61 #Lm [54] (ᴬ..ᵡ) MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D78 #Lm (ᵸ) MODIFIER LETTER CYRILLIC EN
1D9B..1DBF #Lm [37] (ᶛ..ᶿ) MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
2090..2094 #Lm [5] (ₐ..ₔ) LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2C7D #Lm (ⱽ) MODIFIER LETTER CAPITAL V
A770 #Lm (ꝰ) MODIFIER LETTER US

## Total: 117


\p{cased} ∥ [\p{caseignorable} - $caseOverlap]



# Property Stability Policies

# http://www.unicode.org/policies/property_value_stability_table.html




# BIDI



# Stability: The Bidi_Class property values will not be further subdivided.

\p{bc=/^(AL|AN|B|BN|CS|EN|ES|ET|L|LRE|LRO|NSM|ON|PDF|R|RLE|RLO|S|WS)$/} = [\u0000-\U0010FFFF]


# Stability: The property values for the bidirectional properties Bidi_Class and Bidi_Mirrored preserve canonical equivalence.

# This test utilizes the fact that bc=NSM inherit behavior in the algorithm, so these are just filtered

In \p{dt=canonical} bc * \P{bc=NSM} * dm = bc * \P{bc=NSM}


# Stability: The property values for the bidirectional properties Bidi_Class and Bidi_Mirrored preserve canonical equivalence.

# This test utilizes the fact that bc=NSM inherit behavior in the algorithm, so these are just filtered

# There are 5 special cases:

Let $BMExclusions =[≠ ∤ ∦ ≢ \u2ADC]

2224 #Sm (∤) DOES NOT DIVIDE
2226 #Sm (∦) NOT PARALLEL TO
2260 #Sm (≠) NOT EQUAL TO
2262 #Sm (≢) NOT IDENTICAL TO
2ADC #Sm (⫝̸) FORKING

## Total: 5


In [\p{dt=canonical}-$BMExclusions] Bidi_M * \P{bc=NSM} * dm = Bidi_M * \P{bc=NSM}


# Additional BIDI invariant constants

Let $R_blocks = [\u0590-\u05FF \u07C0-\u08FF \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EFFF]

0590 #Cn (�)
0591..05BD #Mn [45] (֑..ֽ) HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE #Pd (־) HEBREW PUNCTUATION MAQAF
05BF #Mn (ֿ) HEBREW POINT RAFE
05C0 #Po (׀) HEBREW PUNCTUATION PASEQ
05C1..05C2 #Mn [2] (ׁ..ׂ) HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 #Po (׃) HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 #Mn [2] (ׄ..ׅ) HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 #Po (׆) HEBREW PUNCTUATION NUN HAFUKHA
05C7 #Mn (ׇ) HEBREW POINT QAMATS QATAN
05C8..05CF #Cn [8] (�..�) ..
05D0..05EA #Lo [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV
05EB..05EF #Cn [5] (�..�) ..
05F0..05F2 #Lo [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 #Po [2] (׳..״) HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
05F5..05FF #Cn [11] (�..�) ..
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
07CA..07EA #Lo [33] (ߊ..ߪ) NKO LETTER A..NKO LETTER JONA RA
07EB..07F3 #Mn [9] (߫..߳) NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 #Lm [2] (ߴ..ߵ) NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07F6 #So (߶) NKO SYMBOL OO DENNEN
07F7..07F9 #Po [3] (߷..߹) NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
07FA #Lm (ߺ) NKO LAJANYALAN
07FB..07FF #Cn [5] (�..�) ..
0800..0815 #Lo [22] (ࠀ..ࠕ) SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
0816..0819 #Mn [4] (ࠖ..࠙) SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081A #Lm (ࠚ) SAMARITAN MODIFIER LETTER EPENTHETIC YUT
081B..0823 #Mn [9] (ࠛ..ࠣ) SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0824 #Lm (ࠤ) SAMARITAN MODIFIER LETTER SHORT A
0825..0827 #Mn [3] (ࠥ..ࠧ) SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0828 #Lm (ࠨ) SAMARITAN MODIFIER LETTER I
0829..082D #Mn [5] (ࠩ..࠭) SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
082E..082F #Cn [2] (�..�) ..
0830..083E #Po [15] (࠰..࠾) SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
083F #Cn (�)
0840..0858 #Lo [25] (�..�) MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0859..085B #Mn [3] (�..�) MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
085C..085D #Cn [2] (�..�) ..
085E #Po (�) MANDAIC PUNCTUATION
085F..08FF #Cn [161] (�..�) ..
FB1D #Lo (יִ) HEBREW LETTER YOD WITH HIRIQ
FB1E #Mn (ﬞ) HEBREW POINT JUDEO-SPANISH VARIKA
FB1F..FB28 #Lo [10] (ײַ..ﬨ) HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB29 #Sm (﬩) HEBREW LETTER ALTERNATIVE PLUS SIGN
FB2A..FB36 #Lo [13] (שׁ..זּ) HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB37 #Cn (�)
FB38..FB3C #Lo [5] (טּ..לּ) HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3D #Cn (�)
FB3E #Lo (מּ) HEBREW LETTER MEM WITH DAGESH
FB3F #Cn (�)
FB40..FB41 #Lo [2] (נּ..סּ) HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB42 #Cn (�)
FB43..FB44 #Lo [2] (ףּ..פּ) HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB45 #Cn (�)
FB46..FB4F #Lo [10] (צּ..ﭏ) HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
10800..10805 #Lo [6] (𐠀..𐠅) CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10806..10807 #Cn [2] (�..�) ..
10808 #Lo (𐠈) CYPRIOT SYLLABLE JO
10809 #Cn (�)
1080A..10835 #Lo [44] (𐠊..𐠵) CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10836 #Cn (�)
10837..10838 #Lo [2] (𐠷..𐠸) CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
10839..1083B #Cn [3] (�..�) ..
1083C #Lo (𐠼) CYPRIOT SYLLABLE ZA
1083D..1083E #Cn [2] (�..�) ..
1083F..10855 #Lo [23] (𐠿..𐡕) CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
10856 #Cn (�)
10857 #Po (𐡗) IMPERIAL ARAMAIC SECTION SIGN
10858..1085F #No [8] (𐡘..𐡟) IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
10860..108FF #Cn [160] (�..�) ..
10900..10915 #Lo [22] (𐤀..𐤕) PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10916..1091B #No [6] (𐤖..𐤛) PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
1091C..1091E #Cn [3] (�..�) ..
1091F #Po (𐤟) PHOENICIAN WORD SEPARATOR
10920..10939 #Lo [26] (𐤠..𐤹) LYDIAN LETTER A..LYDIAN LETTER C
1093A..1093E #Cn [5] (�..�) ..
1093F #Po (𐤿) LYDIAN TRIANGULAR MARK
10940..109FF #Cn [192] (�..�) ..
10A00 #Lo (𐨀) KHAROSHTHI LETTER A
10A01..10A03 #Mn [3] (𐨁..𐨃) KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A04 #Cn (�)
10A05..10A06 #Mn [2] (𐨅..𐨆) KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A07..10A0B #Cn [5] (�..�) ..
10A0C..10A0F #Mn [4] (𐨌..𐨏) KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A10..10A13 #Lo [4] (𐨐..𐨓) KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A14 #Cn (�)
10A15..10A17 #Lo [3] (𐨕..𐨗) KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A18 #Cn (�)
10A19..10A33 #Lo [27] (𐨙..𐨳) KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A34..10A37 #Cn [4] (�..�) ..
10A38..10A3A #Mn [3] (𐨸..𐨺) KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3B..10A3E #Cn [4] (�..�) ..
10A3F #Mn (𐨿) KHAROSHTHI VIRAMA
10A40..10A47 #No [8] (𐩀..𐩇) KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A48..10A4F #Cn [8] (�..�) ..
10A50..10A58 #Po [9] (𐩐..𐩘) KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
10A59..10A5F #Cn [7] (�..�) ..
10A60..10A7C #Lo [29] (𐩠..𐩼) OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10A7D..10A7E #No [2] (𐩽..𐩾) OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
10A7F #Po (𐩿) OLD SOUTH ARABIAN NUMERIC INDICATOR
10A80..10AFF #Cn [128] (�..�) ..
10B00..10B35 #Lo [54] (𐬀..𐬵) AVESTAN LETTER A..AVESTAN LETTER HE
10B36..10B38 #Cn [3] (�..�) ..
10B39..10B3F #Po [7] (𐬹..𐬿) AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
10B40..10B55 #Lo [22] (𐭀..𐭕) INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B56..10B57 #Cn [2] (�..�) ..
10B58..10B5F #No [8] (𐭘..𐭟) INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
10B60..10B72 #Lo [19] (𐭠..𐭲) INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10B73..10B77 #Cn [5] (�..�) ..
10B78..10B7F #No [8] (𐭸..𐭿) INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
10B80..10BFF #Cn [128] (�..�) ..
10C00..10C48 #Lo [73] (𐰀..𐱈) OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
10C49..10E5F #Cn [535] (�..�) ..
10E60..10E7E #No [31] (𐹠..𐹾) RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
10E7F..10FFF #Cn [385] (�..�) ..
1E800..1EFFF #Cn [2048] (�..�) ..

## Total: 4,579


Let $AL_blocks = [\u0600-\u07BF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF]

0600..0603 #Cf [4] (؀..؃) ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
0604..0605 #Cn [2] (�..�) ..
0606..0608 #Sm [3] (؆..؈) ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A #Po [2] (؉..؊) ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B #Sc (؋) AFGHANI SIGN
060C..060D #Po [2] (،..؍) ARABIC COMMA..ARABIC DATE SEPARATOR
060E..060F #So [2] (؎..؏) ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A #Mn [11] (ؐ..ؚ) ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061B #Po (؛) ARABIC SEMICOLON
061C..061D #Cn [2] (�..�) ..
061E..061F #Po [2] (؞..؟) ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
0620..063F #Lo [32] (�..ؿ) ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 #Lm (ـ) ARABIC TATWEEL
0641..064A #Lo [10] (ف..ي) ARABIC LETTER FEH..ARABIC LETTER YEH
064B..065F #Mn [21] (ً..�) ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066A..066D #Po [4] (٪..٭) ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F #Lo [2] (ٮ..ٯ) ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0670 #Mn (ٰ) ARABIC LETTER SUPERSCRIPT ALEF
0671..06D3 #Lo [99] (ٱ..ۓ) ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 #Po (۔) ARABIC FULL STOP
06D5 #Lo (ە) ARABIC LETTER AE
06D6..06DC #Mn [7] (ۖ..ۜ) ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DD #Cf (۝) ARABIC END OF AYAH
06DE #Me (۞) ARABIC START OF RUB EL HIZB
06DF..06E4 #Mn [6] (۟..ۤ) ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 #Lm [2] (ۥ..ۦ) ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 #Mn [2] (ۧ..ۨ) ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 #So (۩) ARABIC PLACE OF SAJDAH
06EA..06ED #Mn [4] (۪..ۭ) ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF #Lo [2] (ۮ..ۯ) ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC #Lo [3] (ۺ..ۼ) ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE #So [2] (۽..۾) ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF #Lo (ۿ) ARABIC LETTER HEH WITH INVERTED V
0700..070D #Po [14] (܀..܍) SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
070E #Cn (�)
070F #Cf (܏) SYRIAC ABBREVIATION MARK
0710 #Lo (ܐ) SYRIAC LETTER ALAPH
0711 #Mn (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH
0712..072F #Lo [30] (ܒ..ܯ) SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A #Mn [27] (ܰ..݊) SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074B..074C #Cn [2] (�..�) ..
074D..07A5 #Lo [89] (ݍ..ޥ) SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
07A6..07B0 #Mn [11] (ަ..ް) THAANA ABAFILI..THAANA SUKUN
07B1 #Lo (ޱ) THAANA LETTER NAA
07B2..07BF #Cn [14] (�..�) ..
FB50..FBB1 #Lo [98] (ﭐ..ﮱ) ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 #Sk [16] (�..�) ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FBC2..FBD2 #Cn [17] (�..�) ..
FBD3..FD3D #Lo [363] (ﯓ..ﴽ) ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD3E #Ps (﴾) ORNATE LEFT PARENTHESIS
FD3F #Pe (﴿) ORNATE RIGHT PARENTHESIS
FD40..FD4F #Cn [16] (�..�) ..
FD50..FD8F #Lo [64] (ﵐ..ﶏ) ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD90..FD91 #Cn [2] (�..�) ..
FD92..FDC7 #Lo [54] (ﶒ..ﷇ) ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDC8..FDCF #Cn [8] (�..�) ..
FDF0..FDFB #Lo [12] (ﷰ..ﷻ) ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FDFC #Sc (﷼) RIAL SIGN
FDFD #So (﷽) ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
FDFE..FDFF #Cn [2] (�..�) ..
FE70..FE74 #Lo [5] (ﹰ..ﹴ) ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE75 #Cn (�)
FE76..FEFC #Lo [135] (ﹶ..ﻼ) ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
FEFD..FEFE #Cn [2] (�..�) ..
FEFF #Cf () ZERO WIDTH NO-BREAK SPACE

## Total: 1,248



# Unassigned characters in these blocks have R or AL respectively

\p{Bidi_Class=R} ⊇ [$R_blocks & \p{gc=Cn}]

\p{Bidi_Class=AL} ⊇ [$AL_blocks & \p{gc=Cn}]


# There are no strong characters of the other directionalities (out of L, AL, R) in these blocks,

# and anything R or L is in the block (or RLM)

$R_blocks ∥ [\p{Bidi_Class=L} \p{Bidi_Class=AL}]

$AL_blocks ∥ [\p{Bidi_Class=L} \p{Bidi_Class=R}]

[$R_blocks $AL_blocks \N{RIGHT-TO-LEFT MARK}] ⊇ [\p{Bidi_Class=AL} \p{Bidi_Class=R}] #200f



# U6.0: BN characters are default ignorable, noncharacters, controls, minus marks, bidi-controls, alphabetic, whitespace, with a few exceptions


Let $BN_Exceptions = [\u001C-\u001F\u17B4\u17B5]

001C..001F #Cc [4] (�..�) ..
17B4..17B5 #Cf [2] (..) KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA

## Total: 6



[\p{Bidi_Class=BN}] = [\p{di}\p{nchar}\p{gc=Cc}-\p{gc=Mc}-\p{gc=Mn}-\p{gc=Me}-\p{Bidi_C}-\p{alpha}-\p{wspace} - $BN_Exceptions]



# Case



# Stability: The Case_Folding property value is limited so that no string when case folded expands to more than 3× in length (measured in code units).

# TODO


# Stability: All characters with the Lowercase property and all characters with the Uppercase property have the Alphabetic property.

\p{Alphabetic} ⊃ [\p{Uppercase} \p{Lowercase}]



# General



# Stability: The General_Category property values will not be further subdivided.

\p{gc=/^(Cc|Cf|Cn|Co|Cs|Ll|Lm|Lo|Lt|Lu|Mc|Me|Mn|Nd|Nl|No|Pc|Pd|Pe|Pf|Pi|Po|Ps|Sc|Sk|Sm|So|Zl|Zp|Zs)$/} = [\u0000-\U0010FFFF]


# Stability: The General_Category property value Control (Cc) is immutable: the set of code points with that value will never change.

\p{GC=Cc} = \p{U-1:GC=Cc}


# Stability: The General_Category property value Private_Use (Co) is immutable: the set of code points with that value will never change.

\p{GC=Co} = \p{U-1:GC=Co}


# Stability: The General_Category property value Surrogate (Cs) is immutable: the set of code points with that value will never change.

\p{GC=Cs} = \p{U-1:GC=Cs}


# Stability: The set of characters having General_Category=Nd will always be the same as the set of characters having Numeric_Type=de.

\p{General_Category=Decimal_Number} = \p{Numeric_Type=Decimal}


# Stability: Once a character is assigned, both its Name and its Jamo_Short_Name will never change.

# Name is covered in Main policies

# TODO: Short Name


# Stability: The Noncharacter_Code_Point property is an immutable code point property, which means that its property values for all Unicode code points will never change.

\p{NChar} = \p{U-1:NChar}



# Identifier Stability



# Stability: Once a character is ID_Continue, it must continue to be so in all future versions.

\p{ID_Continue} ⊇ \p{U-1:ID_Continue}


# Stability: If a character is ID_Start then it must also be ID_Continue.

\p{ID_Continue} ⊇ \p{ID_Start}


# Stability: Once a character is ID_Start, it must continue to be so in all future versions.

\p{ID_Start} ⊇ \p{U-1:ID_Start}


# Stability: Once a character is XID_Continue, it must continue to be so in all future versions.

\p{XID_Continue} ⊇ \p{U-1:XID_Continue}


# Stability: If a character is XID_Start then it must also be XID_Continue.

\p{XID_Continue} ⊇ \p{XID_Start}


# Stability: If a character is XID_Start then it must also be XID_Continue.

\p{XID_Start} ⊇ \p{U-1:XID_Start}


# Stability: The Pattern_Syntax and Pattern_Whitespace properties are immutable code point properties, which means that their property values for all Unicode code points will never change.

\p{Pattern_Whitespace} = \p{U-1:Pattern_Whitespace}

\p{Pattern_Syntax} = \p{U-1:Pattern_Syntax}


# Stability: If a character has the Pattern_Syntax or Pattern_White_Space property, then it cannot have the ID_Continue or XID_Continue property.

# (Also tests that Pattern_Syntax is disjoint from Pattern_White_Space)


\p{ID_Continue} ∥ [\p{Pattern_Whitespace} \p{Pattern_Syntax}]

\p{Pattern_Whitespace} ∥ [\p{ID_Continue} \p{Pattern_Syntax}]

\p{Pattern_Syntax} ∥ [\p{ID_Continue} \p{Pattern_Whitespace}]


\p{XID_Continue} ∥ [\p{Pattern_Whitespace} \p{Pattern_Syntax}]

\p{Pattern_Whitespace} ∥ [\p{XID_Continue} \p{Pattern_Syntax}]

\p{Pattern_Syntax} ∥ [\p{XID_Continue} \p{Pattern_Whitespace}]


# The X versions are subsets of the the plain versions

# Should add as stability provision

\p{ID_Continue} ⊇ \p{XID_Continue}

\p{ID_Start} ⊇ \p{XID_Start}



# Normalization



# Stability: The Canonical_Combining_Class property values are limited to the values 0 to 255.

\p{CCC=/^([0-9][0-9]?|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$/} = [\u0000-\U0010FFFF]


# Stability: Once a character is assigned, its Canonical_Combining_Class will never change.

In \P{U-1:GC=Cn} ccc=U-1:ccc


# Canonical decompositions (minus exclusions) must be identical across releases (also required by strong normalization stability),

# except where a character and at lease one character in its decomposition are both new in the release.

Let $NFC_Exceptions = [\U0001109A\U0001109C\U000110AB]

1109A #Lo (𑂚) KAITHI LETTER DDDHA
1109C #Lo (𑂜) KAITHI LETTER RHA
110AB #Lo (𑂫) KAITHI LETTER VA

## Total: 3


[\p{Decomposition_Type=Canonical} - \p{Full_Composition_Exclusion} - $NFC_Exceptions] = [\p{U-1:Decomposition_Type=Canonical} - \p{U-1:Full_Composition_Exclusion} - $NFC_Exceptions]


# Stability: All characters other than those with General_Category property values Spacing_Mark (Mc) and Nonspacing_Mark (Mn) have the Canonical_Combining_Class property value 0.

\p{CCC=0} ⊇ [^ \p{GC=Mc} \p{GC=Mn}]


# Stability: Canonical and compatibility mappings (Decomposition_Mapping property values) are always in canonical order, and the resulting recursive decomposition will also be in canonical order.

# TODO


# Stability: Canonical mappings (Decomposition_Mapping property values) are always limited either to a single value or to a pair. The second character in the pair cannot itself have a canonical mapping.

# TODO


# Stability: Canonical mappings (Decomposition_Mapping property values) are always limited so that no string when normalized to NFC expands to more than 3× in length (measured in code units).

# TODO


# Stability: Once a character is assigned, its Decomposition_Mapping will never change.

In \P{U-1:GC=Cn} dm=U-1:dm


# U6.0: Construction of Full_Composition_Exclusion

# Primary Composites don't include singletons, ccc!=0, or sequences starting with ccc!=0

Let $combiningExclusions = [\p{dt=canonical}-\P{nfcqc=N}-\P{nfdqc=N}]

0340..0341 #Mn [2] (̀..́) COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
0343..0344 #Mn [2] (̓..̈́) COMBINING GREEK KORONIS..COMBINING GREEK DIALYTIKA TONOS
0374 #Lm (ʹ) GREEK NUMERAL SIGN
037E #Po (;) GREEK QUESTION MARK
0387 #Po (·) GREEK ANO TELEIA
0958..095F #Lo [8] (क़..य़) DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
09DC..09DD #Lo [2] (ড়..ঢ়) BENGALI LETTER RRA..BENGALI LETTER RHA
09DF #Lo (য়) BENGALI LETTER YYA
0A33 #Lo (ਲ਼) GURMUKHI LETTER LLA
0A36 #Lo (ਸ਼) GURMUKHI LETTER SHA
0A59..0A5B #Lo [3] (ਖ਼..ਜ਼) GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA
0A5E #Lo (ਫ਼) GURMUKHI LETTER FA
0B5C..0B5D #Lo [2] (ଡ଼..ଢ଼) ORIYA LETTER RRA..ORIYA LETTER RHA
0F43 #Lo (གྷ) TIBETAN LETTER GHA
0F4D #Lo (ཌྷ) TIBETAN LETTER DDHA
0F52 #Lo (དྷ) TIBETAN LETTER DHA
0F57 #Lo (བྷ) TIBETAN LETTER BHA
0F5C #Lo (ཛྷ) TIBETAN LETTER DZHA
0F69 #Lo (ཀྵ) TIBETAN LETTER KSSA
0F73 #Mn (ཱི) TIBETAN VOWEL SIGN II

## Total: 1,118 (omitting 1,085 from listing)



Let $singletons = \p{toNFD=/^.$/}

0340..0341 #Mn [2] (̀..́) COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
0343 #Mn (̓) COMBINING GREEK KORONIS
0374 #Lm (ʹ) GREEK NUMERAL SIGN
037E #Po (;) GREEK QUESTION MARK
0387 #Po (·) GREEK ANO TELEIA
1FBE #L& (ι) GREEK PROSGEGRAMMENI
1FEF #Sk (`) GREEK VARIA
1FFD #Sk (´) GREEK OXIA
2000..2001 #Zs [2] ( .. ) EN QUAD..EM QUAD
2126 #L& (Ω) OHM SIGN
212A #L& (K) KELVIN SIGN
2329 #Ps (〈) LEFT-POINTING ANGLE BRACKET
232A #Pe (〉) RIGHT-POINTING ANGLE BRACKET
F900..FA0D #Lo [270] (豈..嗀) CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
FA10 #Lo (塚) CJK COMPATIBILITY IDEOGRAPH-FA10
FA12 #Lo (晴) CJK COMPATIBILITY IDEOGRAPH-FA12
FA15..FA1E #Lo [10] (凞..羽) CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
FA20 #Lo (蘒) CJK COMPATIBILITY IDEOGRAPH-FA20
FA22 #Lo (諸) CJK COMPATIBILITY IDEOGRAPH-FA22
FA25..FA26 #Lo [2] (逸..都) CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
FA2A..FA2D #Lo [4] (飯..鶴) CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D

## Total: 1,015 (omitting 710 from listing)


Let $nonstarter = \P{ccc=0}

0300..034E #Mn [79] (̀..͎) COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW
0350..036F #Mn [32] (͐..ͯ) COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
0483..0487 #Mn [5] (҃..҇) COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0591..05BD #Mn [45] (֑..ֽ) HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF #Mn (ֿ) HEBREW POINT RAFE
05C1..05C2 #Mn [2] (ׁ..ׂ) HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 #Mn [2] (ׄ..ׅ) HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 #Mn (ׇ) HEBREW POINT QAMATS QATAN
0610..061A #Mn [11] (ؐ..ؚ) ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
064B..065F #Mn [21] (ً..�) ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0670 #Mn (ٰ) ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC #Mn [7] (ۖ..ۜ) ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DF..06E4 #Mn [6] (۟..ۤ) ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8 #Mn [2] (ۧ..ۨ) ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED #Mn [4] (۪..ۭ) ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
0711 #Mn (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A #Mn [27] (ܰ..݊) SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07EB..07F3 #Mn [9] (߫..߳) NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0816..0819 #Mn [4] (ࠖ..࠙) SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 #Mn [9] (ࠛ..ࠣ) SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A

## Total: 606 (omitting 337 from listing)


Let $firstNonStarter = \p{toNFD=/^$nonstarter/}

0340..0341 #Mn [2] (̀..́) COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
0343..0344 #Mn [2] (̓..̈́) COMBINING GREEK KORONIS..COMBINING GREEK DIALYTIKA TONOS
0F73 #Mn (ཱི) TIBETAN VOWEL SIGN II
0F75 #Mn (ཱུ) TIBETAN VOWEL SIGN UU
0F81 #Mn (ཱྀ) TIBETAN VOWEL SIGN REVERSED II

## Total: 7



$combiningExclusions ⊇ [$singletons & \p{dt=canonical}]

$combiningExclusions ⊇ [$nonstarter & \p{dt=canonical}]

$combiningExclusions ⊇ [$firstNonStarter & \p{dt=canonical}]



# Other Invariant Tests, not in Stability Policies




# Numbers



# Decimals are 0-9


Let $decimalValue = [\p{Numeric_Value=/^[0-9]+(.0)?$/}]

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
00B2..00B3 #No [2] (²..³) SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B9 #No (¹) SUPERSCRIPT ONE
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F #Nd [10] (०..९) DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF #Nd [10] (০..৯) BENGALI DIGIT ZERO..BENGALI DIGIT NINE
09F9 #No (৹) BENGALI CURRENCY DENOMINATOR SIXTEEN
0A66..0A6F #Nd [10] (੦..੯) GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF #Nd [10] (૦..૯) GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F #Nd [10] (୦..୯) ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF #Nd [10] (௦..௯) TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0BF0..0BF2 #No [3] (௰..௲) TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
0C66..0C6F #Nd [10] (౦..౯) TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C78..0C7E #No [7] (౸..౾) TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0CE6..0CEF #Nd [10] (೦..೯) KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F #Nd [10] (൦..൯) MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0D70..0D72 #No [3] (൰..൲) MALAYALAM NUMBER TEN..MALAYALAM NUMBER ONE THOUSAND
0E50..0E59 #Nd [10] (๐..๙) THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 #Nd [10] (໐..໙) LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 #Nd [10] (༠..༩) TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE

## Total: 1,109 (omitting 932 from listing)



$decimalValue ⊇ \p{General_Category=Decimal_Number}


# All and only those items with numeric types have numeric values


Let $anyNumericValue = \p{Numeric_Value=/-?[0-9]+.[0-9]+/}

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
00B2..00B3 #No [2] (²..³) SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B9 #No (¹) SUPERSCRIPT ONE
00BC..00BE #No [3] (¼..¾) VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F #Nd [10] (०..९) DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF #Nd [10] (০..৯) BENGALI DIGIT ZERO..BENGALI DIGIT NINE
09F4..09F9 #No [6] (৴..৹) BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
0A66..0A6F #Nd [10] (੦..੯) GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF #Nd [10] (૦..૯) GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F #Nd [10] (୦..୯) ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0B72..0B77 #No [6] (�..�) ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
0BE6..0BEF #Nd [10] (௦..௯) TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0BF0..0BF2 #No [3] (௰..௲) TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
0C66..0C6F #Nd [10] (౦..౯) TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C78..0C7E #No [7] (౸..౾) TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0CE6..0CEF #Nd [10] (೦..೯) KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F #Nd [10] (൦..൯) MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0D70..0D75 #No [6] (൰..൵) MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
0E50..0E59 #Nd [10] (๐..๙) THAI DIGIT ZERO..THAI DIGIT NINE

## Total: 1,177 (omitting 1,003 from listing)


[\p{Numeric_Type=Decimal} \p{Numeric_Type=Digit} \p{Numeric_Type=Numeric}] = $anyNumericValue



# Misc Properties



# Musical symbol combining marks, other oddities


Let $AlphaExclusions = [\u0F3E\u0F3F\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u1CE1\uAA7B\uABEC\U0001D165\U0001D166\U0001D16D-\U0001D172][[:gc=mc:]&[:ccc=9:]]

0F3E..0F3F #Mc [2] (༾..༿) TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
1063..1064 #Mc [2] (ၣ..ၤ) MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO
1069..106D #Mc [5] (ၩ..ၭ) MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5
1087..108C #Mc [6] (ႇ..ႌ) MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108F #Mc (ႏ) MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B #Mc [2] (ႚ..ႛ) MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
1CE1 #Mc (᳡) VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
AA7B #Mc (ꩻ) MYANMAR SIGN PAO KAREN TONE
ABEC #Mc (꯬) MEETEI MAYEK LUM IYEK
1D165..1D166 #Mc [2] (𝅥..𝅦) MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D..1D172 #Mc [6] (𝅭..𝅲) MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5

## Total: 29



\p{Alphabetic} ⊇ [\p{GC=Lu} \p{GC=Ll} \p{GC=Lt} \p{GC=Lm} \p{GC=Lo} \p{GC=Nl} \p{GC=Mc} - $AlphaExclusions]


# Show [\p{GC=Mc} - \p{alphabetic}]

# Show [\p{GC=Mc} & \p{alphabetic}]



\p{Whitespace} ⊃ [\p{GC=Zs} \p{GC=Zp} \p{GC=Zl}]

\p{GC=Zs} ≉ \p{Name=/SPACE/}


\p{Dash} ⊃ [\p{GC=Pd}]


\p{Script=Common} ∥ [\p{GC=Mn} \p{GC=Me} \p{Join_Control}]

\p{Script=Inherited} ⊆ [\p{GC=Mn} \p{GC=Me} \p{Join_Control}]

\p{Script=Unknown} = [\p{GC=Cn} \p{GC=Co} \p{GC=Cs}]


# [\p{Alphabetic}] ∥ \p{Script=Common}

# & [\p{Decomposition_Type=None} \p{Decomposition_Type=Canonical}]



# LineBreak property



Let $IDInclusions = [[:block=/Ideographs/:][\U00020000-\U0003FFFF] & [:gc=Cn:] - [:NChar:]]

4DB6..4DBF #Cn [10] (�..�) ..
9FCC..9FFF #Cn [52] (�..�) ..
FA2E..FA2F #Cn [2] (�..�) ..
FA6E..FA6F #Cn [2] (�..�) ..
FADA..FAFF #Cn [38] (�..�) ..
2A6D7..2A6FF #Cn [41] (�..�) ..
2B735..2B73F #Cn [11] (�..�) ..
2B81E..2F7FF #Cn [16354] (�..�) ..
2FA1E..2FFFD #Cn [1504] (�..�) ..
30000..3FFFD #Cn [65534] (�..�) ..

## Total: 83,548


\p{LB=ID} ⊃ $IDInclusions

\p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions]


Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288\U00013379]

00A1 #Po (¡) INVERTED EXCLAMATION MARK
00BF #Po (¿) INVERTED QUESTION MARK
2E18 #Po (⸘) INVERTED INTERROBANG
13258..1325A #Lo [3] (𓉘..𓉚) EGYPTIAN HIEROGLYPH O006A..EGYPTIAN HIEROGLYPH O006C
13286 #Lo (𓊆) EGYPTIAN HIEROGLYPH O036A
13288 #Lo (𓊈) EGYPTIAN HIEROGLYPH O036C
13379 #Lo (𓍹) EGYPTIAN HIEROGLYPH V011A

## Total: 9


\p{LB=OP} = [\p{GC=Ps} $OPInclusions]

[\p{LB=CL}\p{LB=CP}] ⊃ \p{GC=Pe}

\p{LB=CM} = [\p{GC=Mn} \p{GC=Me} \p{GC=Mc} \p{GC=Cc} \p{GC=Cf} -\p{LB=SA} -\p{LB=WJ} -\p{LB=ZW} -\p{LB=BA} -\p{LB=LF} -\p{LB=BK} -\p{LB=CR} -\p{LB=NL} -\p{LB=GL} -\p{LB=AL}]


Let $NUInclusions = [\u066B\u066C]

066B..066C #Po [2] (٫..٬) ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR

## Total: 2


\p{LB=NU} = [\p{GC=Nd} $NUInclusions - \p{EA=F} ]


Let $PRInclusions = [\u002b\u005c\u00b1\u2116\u2212\u2213]

002B #Sm (+) PLUS SIGN
005C #Po (\) REVERSE SOLIDUS
00B1 #Sm (±) PLUS-MINUS SIGN
2116 #So (№) NUMERO SIGN
2212..2213 #Sm [2] (−..∓) MINUS SIGN..MINUS-OR-PLUS SIGN

## Total: 6


\p{LB=PR} = [\p{GC=Sc} $PRInclusions - \p{LB=PO} ]


Let $QUInclusions = [\u0022 \u0027 \u275B-\u275E \u2E00-\u2E01 \u2E06-\u2E08 \u2E0B]

0022 #Po (") QUOTATION MARK
0027 #Po (') APOSTROPHE
275B..275E #So [4] (❛..❞) HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2E00..2E01 #Po [2] (⸀..⸁) RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E06..2E08 #Po [3] (⸆..⸈) RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E0B #Po (⸋) RAISED SQUARE

## Total: 12


\p{LB=QU} = [\p{GC=Pf} \p{GC=Pi} $QUInclusions]

\p{LB=SG} = \p{GC=Cs}

\p{LB=SP} = \N{SPACE}

\p{LB=SY} = \N{SOLIDUS}

\p{LB=WJ} = [\N{WORD JOINER} \N{ZERO WIDTH NO-BREAK SPACE}]

\p{LB=ZW} = \N{ZERO WIDTH SPACE}


# SA are limited to certain scripts:

Let $SAScripts = [\p{script=thai} \p{script=lao} \p{script=myanmar} \p{script=khmer} \p{script=Tai_Le} \p{script=New_Tai_Lue} \p{script=Tai_Tham} \p{script=Tai_Viet}]

0E01..0E30 #Lo [48] (ก..ะ) THAI CHARACTER KO KAI..THAI CHARACTER SARA A
0E31 #Mn (ั) THAI CHARACTER MAI HAN-AKAT
0E32..0E33 #Lo [2] (า..ำ) THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
0E34..0E3A #Mn [7] (ิ..ฺ) THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E40..0E45 #Lo [6] (เ..ๅ) THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
0E46 #Lm (ๆ) THAI CHARACTER MAIYAMOK
0E47..0E4E #Mn [8] (็..๎) THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0E4F #Po (๏) THAI CHARACTER FONGMAN
0E50..0E59 #Nd [10] (๐..๙) THAI DIGIT ZERO..THAI DIGIT NINE
0E5A..0E5B #Po [2] (๚..๛) THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
0E81..0E82 #Lo [2] (ກ..ຂ) LAO LETTER KO..LAO LETTER KHO SUNG
0E84 #Lo (ຄ) LAO LETTER KHO TAM
0E87..0E88 #Lo [2] (ງ..ຈ) LAO LETTER NGO..LAO LETTER CO
0E8A #Lo (ຊ) LAO LETTER SO TAM
0E8D #Lo (ຍ) LAO LETTER NYO
0E94..0E97 #Lo [4] (ດ..ທ) LAO LETTER DO..LAO LETTER THO TAM
0E99..0E9F #Lo [7] (ນ..ຟ) LAO LETTER NO..LAO LETTER FO SUNG
0EA1..0EA3 #Lo [3] (ມ..ຣ) LAO LETTER MO..LAO LETTER LO LING
0EA5 #Lo (ລ) LAO LETTER LO LOOT
0EA7 #Lo (ວ) LAO LETTER WO
0EAA..0EAB #Lo [2] (ສ..ຫ) LAO LETTER SO SUNG..LAO LETTER HO SUNG
0EAD..0EB0 #Lo [4] (ອ..ະ) LAO LETTER O..LAO VOWEL SIGN A
0EB1 #Mn (ັ) LAO VOWEL SIGN MAI KAN
0EB2..0EB3 #Lo [2] (າ..ຳ) LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
0EB4..0EB9 #Mn [6] (ິ..ູ) LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC #Mn [2] (ົ..ຼ) LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EBD #Lo (ຽ) LAO SEMIVOWEL SIGN NYO
0EC0..0EC4 #Lo [5] (ເ..ໄ) LAO VOWEL SIGN E..LAO VOWEL SIGN AI
0EC6 #Lm (ໆ) LAO KO LA
0EC8..0ECD #Mn [6] (່..ໍ) LAO TONE MAI EK..LAO NIGGAHITA
0ED0..0ED9 #Nd [10] (໐..໙) LAO DIGIT ZERO..LAO DIGIT NINE
0EDC..0EDD #Lo [2] (ໜ..ໝ) LAO HO NO..LAO HO MO

## Total: 802 (omitting 651 from listing)


$SAScripts ⊇ \p{LineBreak=SA}


# And in those scripts, they are all the alphabetic spacing characters, plus some odd Cf & Mn

Let $SAScriptExceptions = [\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u109E\u109F\u19DE\u19DF\u1AA0-\u1AA6\u1AA8-\u1AAD\uAA77-\uAA79\uAA7B\uAADB-\uAADF]

1063..1064 #Mc [2] (ၣ..ၤ) MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO
1069..106D #Mc [5] (ၩ..ၭ) MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5
1087..108C #Mc [6] (ႇ..ႌ) MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108F #Mc (ႏ) MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B #Mc [2] (ႚ..ႛ) MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
109E..109F #So [2] (႞..႟) MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
19DE..19DF #Po [2] (᧞..᧟) NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
1AA0..1AA6 #Po [7] (᪠..᪦) TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
1AA8..1AAD #Po [6] (᪨..᪭) TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
AA77..AA79 #So [3] (꩷..꩹) MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
AA7B #Mc (ꩻ) MYANMAR SIGN PAO KAREN TONE
AADB..AADC #Lo [2] (ꫛ..ꫜ) TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
AADD #Lm (ꫝ) TAI VIET SYMBOL SAM
AADE..AADF #Po [2] (꫞..꫟) TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI

## Total: 42



[$SAScripts & [\p{Alphabetic} \p{gc=cf} \p{gc=Mn} $SAScriptExceptions]] = [$SAScripts & [\p{LineBreak=SA} \p{LineBreak=CM}]]



# Word Break



# 6.0: Compare word characters with similar identifier characters


# UAX 31 Table 3: Candidate Characters for Inclusion in Identifiers

# Warning: the uax31 tables don't have machine-readable tables, so must be updated each release.

Let $uax31table3 = [\u0027\u002D\u002E\u003A\u00B7\u058A\u05F3\u05F4\u0F0B\u200C\u200D\u2010\u2019\u2027\u30A0\u30FB]

0027 #Po (') APOSTROPHE
002D #Pd (-) HYPHEN-MINUS
002E #Po (.) FULL STOP
003A #Po (:) COLON
00B7 #Po (·) MIDDLE DOT
058A #Pd (֊) ARMENIAN HYPHEN
05F3..05F4 #Po [2] (׳..״) HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
0F0B #Po (་) TIBETAN MARK INTERSYLLABIC TSHEG
200C..200D #Cf [2] (..) ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
2010 #Pd (‐) HYPHEN
2019 #Pf (’) RIGHT SINGLE QUOTATION MARK
2027 #Po (‧) HYPHENATION POINT
30A0 #Pd (゠) KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB #Po (・) KATAKANA MIDDLE DOT

## Total: 16



Let $WBRemovals = [\u0387\u2018\u2024\u2E2F\uFE13\uFE52\uFE55\uFF07\uFF0E\uFF1A\u200c\u200d'.\:\u00AD\u00B7\u05F3\u05F4\u0600-\u0603\u06DD\u070F\u17B4\u17B5\u200E\u200F\u2019\u2027\u202A-\u202E\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB\U000110BD\U0001D173-\U0001D17A\U000E0001\U000E0020-\U000E007F\p{Cf}\p{Block=Enclosed Alphanumerics}]

0027 #Po (') APOSTROPHE
002E #Po (.) FULL STOP
003A #Po (:) COLON
00AD #Cf () SOFT HYPHEN
00B7 #Po (·) MIDDLE DOT
0387 #Po (·) GREEK ANO TELEIA
05F3..05F4 #Po [2] (׳..״) HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
0600..0603 #Cf [4] (؀..؃) ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
06DD #Cf (۝) ARABIC END OF AYAH
070F #Cf (܏) SYRIAC ABBREVIATION MARK
17B4..17B5 #Cf [2] (..) KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
200C..200F #Cf [4] (..‏) ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
2018 #Pi (‘) LEFT SINGLE QUOTATION MARK
2019 #Pf (’) RIGHT SINGLE QUOTATION MARK
2024 #Po (․) ONE DOT LEADER
2027 #Po (‧) HYPHENATION POINT
202A..202E #Cf [5] (..) LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 #Cf [5] (..) WORD JOINER..INVISIBLE PLUS
206A..206F #Cf [6] (..) INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
2460..249B #No [60] (①..⒛) CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 #So [78] (⒜..ⓩ) PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
24EA..24FF #No [22] (⓪..⓿) CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
2E2F #Lm (ⸯ) VERTICAL TILDE

## Total: 317 (omitting 116 from listing)


Let $Uax31Removals = [\-\u058A\u0F0B\u2010\u30A0\u30FB\u2E2F]

002D #Pd (-) HYPHEN-MINUS
058A #Pd (֊) ARMENIAN HYPHEN
0F0B #Po (་) TIBETAN MARK INTERSYLLABIC TSHEG
2010 #Pd (‐) HYPHEN
2E2F #Lm (ⸯ) VERTICAL TILDE
30A0 #Pd (゠) KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB #Po (・) KATAKANA MIDDLE DOT

## Total: 7



[\p{Alpha}\p{WB=Extend}\p{WB=FO}\p{WB=LE}\p{WB=ML}\p{WB=MB}\p{WB=EX}-$WBRemovals] = [$gcAllLetters $gcAllMarks \p{gc=Nl}\p{gc=Pc}-$Uax31Removals]



# Derivations



\p{Math} = [\p{Other_Math} \p{GC=Sm}]

\p{Alphabetic} = [\p{Other_Alphabetic} \p{GC=Lu} \p{GC=Ll} \p{GC=Lt} \p{GC=Lm} \p{GC=Lo} \p{GC=Nl}]

\p{Lowercase} = [\p{Other_Lowercase} \p{GC=Ll}]

\p{Uppercase} = [\p{Other_Uppercase} \p{GC=Lu}]

\p{ID_Start} = [\p{Other_ID_Start} \p{GC=Lu} \p{GC=Ll} \p{GC=Lt} \p{GC=Lm} \p{GC=Lo} \p{GC=Nl} - \p{Pattern_Syntax} - \p{Pattern_White_Space}]

\p{ID_Continue} = [\p{Other_ID_Continue} \p{ID_Start} \p{GC=Mn} \p{GC=Mc} \p{GC=Nd} \p{GC=Pc} - \p{Pattern_Syntax} - \p{Pattern_White_Space}]


Let $DIExclusions = [\u0600-\u0603\u06DD\u070F\uFFF9-\uFFFB\U000110BD]

0600..0603 #Cf [4] (؀..؃) ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
06DD #Cf (۝) ARABIC END OF AYAH
070F #Cf (܏) SYRIAC ABBREVIATION MARK
FFF9..FFFB #Cf [3] (..) INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD #Cf (𑂽) KAITHI NUMBER SIGN

## Total: 10



\p{Default_Ignorable_Code_Point} = [\p{Other_Default_Ignorable_Code_Point} \p{GC=Cf} \p{Variation_Selector} - [\p{White_Space} $DIExclusions]]


\p{Grapheme_Extend} = [\p{Other_Grapheme_Extend} \p{GC=Me} \p{GC=Mn}]


\p{Grapheme_Base} = [^\p{GC=Cc} \p{GC=Cf} \p{GC=Cs} \p{GC=Co} \p{GC=Cn} \p{GC=Zl} \p{GC=Zp} \p{Grapheme_Extend}]

\p{Grapheme_Link} = \p{CCC=Virama}


# "Minimal" Other_: NOT hard requirements; just if we want to be minimal

# (Should add way to make these warnings, not errors)


\p{Other_Math} = [\p{Math} - \p{GC=Sm}]

\p{Other_Alphabetic} = [\p{Alphabetic} - [\p{GC=Lu} \p{GC=Ll} \p{GC=Lt} \p{GC=Lm} \p{GC=Lo} \p{GC=Nl}]]

\p{Other_Lowercase} = [\p{Lowercase} - \p{GC=Ll}]

\p{Other_Uppercase} = [\p{Uppercase} - \p{GC=Lu}]

\p{Other_ID_Start} = [\p{ID_Start} - [\p{GC=Lu} \p{GC=Ll} \p{GC=Lt} \p{GC=Lm} \p{GC=Lo} \p{GC=Nl} - \p{Pattern_Syntax} - \p{Pattern_White_Space}]]

\p{Other_ID_Continue} = [\p{ID_Continue} - [\p{ID_Start} \p{GC=Mn} \p{GC=Mc} \p{GC=Nd} \p{GC=Pc} - \p{Pattern_Syntax} - \p{Pattern_White_Space}]]


Let $Annotations = [\uFFF9-\uFFFB]

FFF9..FFFB #Cf [3] (..) INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR

## Total: 3



\p{Other_Default_Ignorable_Code_Point} = [\p{Default_Ignorable_Code_Point} - [\p{GC=Cf} \p{Variation_Selector} - [\p{White_Space} $Annotations]]]

\p{Other_Grapheme_Extend} = [\p{Grapheme_Extend} - [\p{GC=Me} \p{GC=Mn}]]



# POSIX Compatibility Properties (UTS#18)

# http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap07.html



# constants


Let $SP = [\u0020] # [\N{space}]

0020 #Zs ( ) SPACE

## Total: 1


Let $TAB = [\u0009] # [\N{CHARACTER TABULATION}]

0009 #Cc ()

## Total: 1


Let $LF = [\u000A] # \N{linefeed}

000A #Cc ( )

## Total: 1


Let $VTAB = [\u000B] # [\N{LINE TABULATION}]

000B #Cc ( )

## Total: 1


Let $FF = [\u000C] # [\N{formfeed}]

000C #Cc ( )

## Total: 1


Let $CR = [\u000D] # \N{carriage return}

000D #Cc ( )

## Total: 1


Let $NEL = [\u0085] # \N{next line}

0085 #Cc (…)

## Total: 1


#Let $ZWNJ = [\u200C] # [\N{ZERO WIDTH NON-JOINER}]

#Let $ZWJ = [\u200D] # [\N{ZERO WIDTH JOINER}]


Let $CircledAsciiLetters = [\u24B6-\u24E9]

24B6..24E9 #So [52] (Ⓐ..ⓩ) CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z

## Total: 52



# Unassigned, Control, Format, Private_Use, Surrogate,

# Uppercase_Letter, Lowercase_Letter, Titlecase_Letter, Modifier_Letter, Other_Letter,

# Nonspacing_Mark, Enclosing_Mark, Spacing_Mark,

# Decimal_Number, Letter_Number, Other_Number,

# Space_Separator, Line_Separator, Paragraph_Separator,

# Dash_Punctuation, Open_Punctuation, Close_Punctuation, Connector_Punctuation, Other_Punctuation, Initial_Punctuation, Final_Punctuation

# Math_Symbol, Currency_Symbol, Modifier_Symbol, Other_Symbol


# UTS Rules


Let $alpha = [\p{Alphabetic} $CircledAsciiLetters]

0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00B5 #L& (µ) MICRO SIGN
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
0345 #Mn (ͅ) COMBINING GREEK YPOGEGRAMMENI
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 #L& [20] (Ύ..Ρ) GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO

## Total: 101,539 (omitting 100,916 from listing)


Let $lower = \p{Lowercase}

0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00B5 #L& (µ) MICRO SIGN
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00DF..00F6 #L& [24] (ß..ö) LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS
00F8..00FF #L& [8] (ø..ÿ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS
0101 #L& (ā) LATIN SMALL LETTER A WITH MACRON
0103 #L& (ă) LATIN SMALL LETTER A WITH BREVE
0105 #L& (ą) LATIN SMALL LETTER A WITH OGONEK
0107 #L& (ć) LATIN SMALL LETTER C WITH ACUTE
0109 #L& (ĉ) LATIN SMALL LETTER C WITH CIRCUMFLEX
010B #L& (ċ) LATIN SMALL LETTER C WITH DOT ABOVE
010D #L& (č) LATIN SMALL LETTER C WITH CARON
010F #L& (ď) LATIN SMALL LETTER D WITH CARON
0111 #L& (đ) LATIN SMALL LETTER D WITH STROKE
0113 #L& (ē) LATIN SMALL LETTER E WITH MACRON
0115 #L& (ĕ) LATIN SMALL LETTER E WITH BREVE
0117 #L& (ė) LATIN SMALL LETTER E WITH DOT ABOVE
0119 #L& (ę) LATIN SMALL LETTER E WITH OGONEK
011B #L& (ě) LATIN SMALL LETTER E WITH CARON

## Total: 1,918 (omitting 1,843 from listing)


Let $upper = [\p{Uppercase}]

0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00DE #L& [7] (Ø..Þ) LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN
0100 #L& (Ā) LATIN CAPITAL LETTER A WITH MACRON
0102 #L& (Ă) LATIN CAPITAL LETTER A WITH BREVE
0104 #L& (Ą) LATIN CAPITAL LETTER A WITH OGONEK
0106 #L& (Ć) LATIN CAPITAL LETTER C WITH ACUTE
0108 #L& (Ĉ) LATIN CAPITAL LETTER C WITH CIRCUMFLEX
010A #L& (Ċ) LATIN CAPITAL LETTER C WITH DOT ABOVE
010C #L& (Č) LATIN CAPITAL LETTER C WITH CARON
010E #L& (Ď) LATIN CAPITAL LETTER D WITH CARON
0110 #L& (Đ) LATIN CAPITAL LETTER D WITH STROKE
0112 #L& (Ē) LATIN CAPITAL LETTER E WITH MACRON
0114 #L& (Ĕ) LATIN CAPITAL LETTER E WITH BREVE
0116 #L& (Ė) LATIN CAPITAL LETTER E WITH DOT ABOVE
0118 #L& (Ę) LATIN CAPITAL LETTER E WITH OGONEK
011A #L& (Ě) LATIN CAPITAL LETTER E WITH CARON
011C #L& (Ĝ) LATIN CAPITAL LETTER G WITH CIRCUMFLEX
011E #L& (Ğ) LATIN CAPITAL LETTER G WITH BREVE
0120 #L& (Ġ) LATIN CAPITAL LETTER G WITH DOT ABOVE

## Total: 1,478 (omitting 1,405 from listing)


Let $punct = [$gcAllPunctuation $gcAllSymbols - $alpha]

0021..0023 #Po [3] (!..#) EXCLAMATION MARK..NUMBER SIGN
0024 #Sc ($) DOLLAR SIGN
0025..0027 #Po [3] (%..') PERCENT SIGN..APOSTROPHE
0028 #Ps (() LEFT PARENTHESIS
0029 #Pe ()) RIGHT PARENTHESIS
002A #Po (*) ASTERISK
002B #Sm (+) PLUS SIGN
002C #Po (,) COMMA
002D #Pd (-) HYPHEN-MINUS
002E..002F #Po [2] (.../) FULL STOP..SOLIDUS
003A..003B #Po [2] (:..;) COLON..SEMICOLON
003C..003E #Sm [3] (<..>) LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 #Po [2] (?..@) QUESTION MARK..COMMERCIAL AT
005B #Ps ([) LEFT SQUARE BRACKET
005C #Po (\) REVERSE SOLIDUS
005D #Pe (]) RIGHT SQUARE BRACKET
005E #Sk (^) CIRCUMFLEX ACCENT
005F #Pc (_) LOW LINE
0060 #Sk (`) GRAVE ACCENT
007B #Ps ({) LEFT CURLY BRACKET
007C #Sm (|) VERTICAL LINE
007D #Pe (}) RIGHT CURLY BRACKET
007E #Sm (~) TILDE
00A1 #Po (¡) INVERTED EXCLAMATION MARK
00A2..00A5 #Sc [4] (¢..¥) CENT SIGN..YEN SIGN
00A6..00A7 #So [2] (¦..§) BROKEN BAR..SECTION SIGN
00A8 #Sk (¨) DIAERESIS
00A9 #So (©) COPYRIGHT SIGN
00AB #Pi («) LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC #Sm (¬) NOT SIGN
00AE #So (®) REGISTERED SIGN
00AF #Sk (¯) MACRON
00B0 #So (°) DEGREE SIGN
00B1 #Sm (±) PLUS-MINUS SIGN
00B4 #Sk (´) ACUTE ACCENT
00B6 #So (¶) PILCROW SIGN
00B7 #Po (·) MIDDLE DOT
00B8 #Sk (¸) CEDILLA
00BB #Pf (») RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BF #Po (¿) INVERTED QUESTION MARK
00D7 #Sm (×) MULTIPLICATION SIGN
00F7 #Sm (÷) DIVISION SIGN
02C2..02C5 #Sk [4] (˂..˅) MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02D2..02DF #Sk [14] (˒..˟) MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB #Sk [7] (˥..˫) MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02ED #Sk (˭) MODIFIER LETTER UNASPIRATED
02EF..02FF #Sk [17] (˯..˿) MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0375 #Sk (͵) GREEK LOWER NUMERAL SIGN
037E #Po (;) GREEK QUESTION MARK

## Total: 6,052 (omitting 5,952 from listing)


Let $digit = \p{gc=Decimal_Number}

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F #Nd [10] (०..९) DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF #Nd [10] (০..৯) BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0A66..0A6F #Nd [10] (੦..੯) GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF #Nd [10] (૦..૯) GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F #Nd [10] (୦..୯) ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF #Nd [10] (௦..௯) TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0C66..0C6F #Nd [10] (౦..౯) TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0CE6..0CEF #Nd [10] (೦..೯) KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F #Nd [10] (൦..൯) MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0E50..0E59 #Nd [10] (๐..๙) THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 #Nd [10] (໐..໙) LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 #Nd [10] (༠..༩) TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
1040..1049 #Nd [10] (၀..၉) MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
1090..1099 #Nd [10] (႐..႙) MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
17E0..17E9 #Nd [10] (០..៩) KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 #Nd [10] (᠐..᠙) MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE

## Total: 421 (omitting 221 from listing)


Let $xdigit = [\p{gc=Decimal_Number} \p{Hex_Digit}] # in both!

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
0041..0046 #L& [6] (A..F) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
0061..0066 #L& [6] (a..f) LATIN SMALL LETTER A..LATIN SMALL LETTER F
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F #Nd [10] (०..९) DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF #Nd [10] (০..৯) BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0A66..0A6F #Nd [10] (੦..੯) GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF #Nd [10] (૦..૯) GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F #Nd [10] (୦..୯) ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF #Nd [10] (௦..௯) TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0C66..0C6F #Nd [10] (౦..౯) TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0CE6..0CEF #Nd [10] (೦..೯) KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F #Nd [10] (൦..൯) MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0E50..0E59 #Nd [10] (๐..๙) THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 #Nd [10] (໐..໙) LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 #Nd [10] (༠..༩) TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
1040..1049 #Nd [10] (၀..၉) MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
1090..1099 #Nd [10] (႐..႙) MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE

## Total: 445 (omitting 253 from listing)


Let $alnum = [$alpha $digit]

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00B5 #L& (µ) MICRO SIGN
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
0345 #Mn (ͅ) COMBINING GREEK YPOGEGRAMMENI
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS

## Total: 101,960 (omitting 101,347 from listing)


Let $space = \p{Whitespace}

0009..000D #Cc [5] (.. ) ..
0020 #Zs ( ) SPACE
0085 #Cc (…)
00A0 #Zs ( ) NO-BREAK SPACE
1680 #Zs ( ) OGHAM SPACE MARK
180E #Zs (᠎) MONGOLIAN VOWEL SEPARATOR
2000..200A #Zs [11] ( .. ) EN QUAD..HAIR SPACE
2028 #Zl (
) LINE SEPARATOR
2029 #Zp (
) PARAGRAPH SEPARATOR
202F #Zs ( ) NARROW NO-BREAK SPACE
205F #Zs ( ) MEDIUM MATHEMATICAL SPACE
3000 #Zs ( ) IDEOGRAPHIC SPACE

## Total: 26


Let $blank = [\p{Whitespace} - [$LF $VTAB $FF $CR $NEL \p{gc=Line_Separator} \p{gc=Paragraph_Separator}]]

0009 #Cc ()
0020 #Zs ( ) SPACE
00A0 #Zs ( ) NO-BREAK SPACE
1680 #Zs ( ) OGHAM SPACE MARK
180E #Zs (᠎) MONGOLIAN VOWEL SEPARATOR
2000..200A #Zs [11] ( .. ) EN QUAD..HAIR SPACE
202F #Zs ( ) NARROW NO-BREAK SPACE
205F #Zs ( ) MEDIUM MATHEMATICAL SPACE
3000 #Zs ( ) IDEOGRAPHIC SPACE

## Total: 19


Let $cntrl = \p{gc=Control}

0000..001F #Cc [32] (�..�) ..
007F..009F #Cc [33] (�..�) ..

## Total: 65


Let $graph = [^$space \p{gc=Control} \p{gc=Surrogate} \p{gc=Unassigned}]

0021..0023 #Po [3] (!..#) EXCLAMATION MARK..NUMBER SIGN
0024 #Sc ($) DOLLAR SIGN
0025..0027 #Po [3] (%..') PERCENT SIGN..APOSTROPHE
0028 #Ps (() LEFT PARENTHESIS
0029 #Pe ()) RIGHT PARENTHESIS
002A #Po (*) ASTERISK
002B #Sm (+) PLUS SIGN
002C #Po (,) COMMA
002D #Pd (-) HYPHEN-MINUS
002E..002F #Po [2] (.../) FULL STOP..SOLIDUS
0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
003A..003B #Po [2] (:..;) COLON..SEMICOLON
003C..003E #Sm [3] (<..>) LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 #Po [2] (?..@) QUESTION MARK..COMMERCIAL AT
0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
005B #Ps ([) LEFT SQUARE BRACKET
005C #Po (\) REVERSE SOLIDUS
005D #Pe (]) RIGHT SQUARE BRACKET
005E #Sk (^) CIRCUMFLEX ACCENT
005F #Pc (_) LOW LINE
0060 #Sk (`) GRAVE ACCENT
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
007B #Ps ({) LEFT CURLY BRACKET
007C #Sm (|) VERTICAL LINE
007D #Pe (}) RIGHT CURLY BRACKET
007E #Sm (~) TILDE
00A1 #Po (¡) INVERTED EXCLAMATION MARK
00A2..00A5 #Sc [4] (¢..¥) CENT SIGN..YEN SIGN
00A6..00A7 #So [2] (¦..§) BROKEN BAR..SECTION SIGN
00A8 #Sk (¨) DIAERESIS
00A9 #So (©) COPYRIGHT SIGN
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00AB #Pi («) LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC #Sm (¬) NOT SIGN
00AD #Cf () SOFT HYPHEN
00AE #So (®) REGISTERED SIGN
00AF #Sk (¯) MACRON
00B0 #So (°) DEGREE SIGN
00B1 #Sm (±) PLUS-MINUS SIGN
00B2..00B3 #No [2] (²..³) SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B4 #Sk (´) ACUTE ACCENT
00B5 #L& (µ) MICRO SIGN
00B6 #So (¶) PILCROW SIGN
00B7 #Po (·) MIDDLE DOT
00B8 #Sk (¸) CEDILLA
00B9 #No (¹) SUPERSCRIPT ONE
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00BB #Pf (») RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BC..00BE #No [3] (¼..¾) VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
00BF #Po (¿) INVERTED QUESTION MARK
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D7 #Sm (×) MULTIPLICATION SIGN
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F7 #Sm (÷) DIVISION SIGN
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C2..02C5 #Sk [4] (˂..˅) MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF #Sk [14] (˒..˟) MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02E5..02EB #Sk [7] (˥..˫) MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02ED #Sk (˭) MODIFIER LETTER UNASPIRATED
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
02EF..02FF #Sk [17] (˯..˿) MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0300..036F #Mn [112] (̀..ͯ) COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0375 #Sk (͵) GREEK LOWER NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
037E #Po (;) GREEK QUESTION MARK
0384..0385 #Sk [2] (΄..΅) GREEK TONOS..GREEK DIALYTIKA TONOS
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0387 #Po (·) GREEK ANO TELEIA
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 #L& [20] (Ύ..Ρ) GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 #L& [83] (Σ..ϵ) GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F6 #Sm (϶) GREEK REVERSED LUNATE EPSILON SYMBOL
03F7..0481 #L& [139] (Ϸ..ҁ) GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
0482 #So (҂) CYRILLIC THOUSANDS SIGN
0483..0487 #Mn [5] (҃..҇) COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0488..0489 #Me [2] (҈..҉) COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0527 #L& [158] (Ҋ..�) CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
0531..0556 #L& [38] (Ա..Ֆ) ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 #Lm (ՙ) ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F #Po [6] (՚..՟) ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 #L& [39] (ա..և) ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
0589 #Po (։) ARMENIAN FULL STOP
058A #Pd (֊) ARMENIAN HYPHEN
0591..05BD #Mn [45] (֑..ֽ) HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE #Pd (־) HEBREW PUNCTUATION MAQAF
05BF #Mn (ֿ) HEBREW POINT RAFE
05C0 #Po (׀) HEBREW PUNCTUATION PASEQ
05C1..05C2 #Mn [2] (ׁ..ׂ) HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 #Po (׃) HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 #Mn [2] (ׄ..ׅ) HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 #Po (׆) HEBREW PUNCTUATION NUN HAFUKHA
05C7 #Mn (ׇ) HEBREW POINT QAMATS QATAN
05D0..05EA #Lo [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 #Lo [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 #Po [2] (׳..״) HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
0600..0603 #Cf [4] (؀..؃) ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
0606..0608 #Sm [3] (؆..؈) ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A #Po [2] (؉..؊) ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B #Sc (؋) AFGHANI SIGN
060C..060D #Po [2] (،..؍) ARABIC COMMA..ARABIC DATE SEPARATOR
060E..060F #So [2] (؎..؏) ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A #Mn [11] (ؐ..ؚ) ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061B #Po (؛) ARABIC SEMICOLON
061E..061F #Po [2] (؞..؟) ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
0620..063F #Lo [32] (�..ؿ) ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 #Lm (ـ) ARABIC TATWEEL
0641..064A #Lo [10] (ف..ي) ARABIC LETTER FEH..ARABIC LETTER YEH
064B..065F #Mn [21] (ً..�) ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066A..066D #Po [4] (٪..٭) ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F #Lo [2] (ٮ..ٯ) ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0670 #Mn (ٰ) ARABIC LETTER SUPERSCRIPT ALEF
0671..06D3 #Lo [99] (ٱ..ۓ) ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 #Po (۔) ARABIC FULL STOP
06D5 #Lo (ە) ARABIC LETTER AE
06D6..06DC #Mn [7] (ۖ..ۜ) ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DD #Cf (۝) ARABIC END OF AYAH
06DE #Me (۞) ARABIC START OF RUB EL HIZB
06DF..06E4 #Mn [6] (۟..ۤ) ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 #Lm [2] (ۥ..ۦ) ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 #Mn [2] (ۧ..ۨ) ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 #So (۩) ARABIC PLACE OF SAJDAH
06EA..06ED #Mn [4] (۪..ۭ) ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF #Lo [2] (ۮ..ۯ) ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC #Lo [3] (ۺ..ۼ) ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE #So [2] (۽..۾) ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF #Lo (ۿ) ARABIC LETTER HEH WITH INVERTED V
0700..070D #Po [14] (܀..܍) SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
070F #Cf (܏) SYRIAC ABBREVIATION MARK
0710 #Lo (ܐ) SYRIAC LETTER ALAPH
0711 #Mn (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH
0712..072F #Lo [30] (ܒ..ܯ) SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A #Mn [27] (ܰ..݊) SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074D..07A5 #Lo [89] (ݍ..ޥ) SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
07A6..07B0 #Mn [11] (ަ..ް) THAANA ABAFILI..THAANA SUKUN
07B1 #Lo (ޱ) THAANA LETTER NAA
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
07CA..07EA #Lo [33] (ߊ..ߪ) NKO LETTER A..NKO LETTER JONA RA
07EB..07F3 #Mn [9] (߫..߳) NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 #Lm [2] (ߴ..ߵ) NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07F6 #So (߶) NKO SYMBOL OO DENNEN
07F7..07F9 #Po [3] (߷..߹) NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
07FA #Lm (ߺ) NKO LAJANYALAN

## Total: 246,831 (omitting 244,929 from listing)


Let $print = [$graph $blank - $cntrl]

0020 #Zs ( ) SPACE
0021..0023 #Po [3] (!..#) EXCLAMATION MARK..NUMBER SIGN
0024 #Sc ($) DOLLAR SIGN
0025..0027 #Po [3] (%..') PERCENT SIGN..APOSTROPHE
0028 #Ps (() LEFT PARENTHESIS
0029 #Pe ()) RIGHT PARENTHESIS
002A #Po (*) ASTERISK
002B #Sm (+) PLUS SIGN
002C #Po (,) COMMA
002D #Pd (-) HYPHEN-MINUS
002E..002F #Po [2] (.../) FULL STOP..SOLIDUS
0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
003A..003B #Po [2] (:..;) COLON..SEMICOLON
003C..003E #Sm [3] (<..>) LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 #Po [2] (?..@) QUESTION MARK..COMMERCIAL AT
0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
005B #Ps ([) LEFT SQUARE BRACKET
005C #Po (\) REVERSE SOLIDUS
005D #Pe (]) RIGHT SQUARE BRACKET
005E #Sk (^) CIRCUMFLEX ACCENT
005F #Pc (_) LOW LINE
0060 #Sk (`) GRAVE ACCENT
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
007B #Ps ({) LEFT CURLY BRACKET
007C #Sm (|) VERTICAL LINE
007D #Pe (}) RIGHT CURLY BRACKET
007E #Sm (~) TILDE
00A0 #Zs ( ) NO-BREAK SPACE
00A1 #Po (¡) INVERTED EXCLAMATION MARK
00A2..00A5 #Sc [4] (¢..¥) CENT SIGN..YEN SIGN
00A6..00A7 #So [2] (¦..§) BROKEN BAR..SECTION SIGN
00A8 #Sk (¨) DIAERESIS
00A9 #So (©) COPYRIGHT SIGN
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00AB #Pi («) LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC #Sm (¬) NOT SIGN
00AD #Cf () SOFT HYPHEN
00AE #So (®) REGISTERED SIGN
00AF #Sk (¯) MACRON
00B0 #So (°) DEGREE SIGN
00B1 #Sm (±) PLUS-MINUS SIGN
00B2..00B3 #No [2] (²..³) SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B4 #Sk (´) ACUTE ACCENT
00B5 #L& (µ) MICRO SIGN
00B6 #So (¶) PILCROW SIGN
00B7 #Po (·) MIDDLE DOT
00B8 #Sk (¸) CEDILLA
00B9 #No (¹) SUPERSCRIPT ONE
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00BB #Pf (») RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BC..00BE #No [3] (¼..¾) VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
00BF #Po (¿) INVERTED QUESTION MARK
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D7 #Sm (×) MULTIPLICATION SIGN
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F7 #Sm (÷) DIVISION SIGN
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C2..02C5 #Sk [4] (˂..˅) MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF #Sk [14] (˒..˟) MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02E5..02EB #Sk [7] (˥..˫) MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02ED #Sk (˭) MODIFIER LETTER UNASPIRATED
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
02EF..02FF #Sk [17] (˯..˿) MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0300..036F #Mn [112] (̀..ͯ) COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0375 #Sk (͵) GREEK LOWER NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
037E #Po (;) GREEK QUESTION MARK
0384..0385 #Sk [2] (΄..΅) GREEK TONOS..GREEK DIALYTIKA TONOS
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0387 #Po (·) GREEK ANO TELEIA
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 #L& [20] (Ύ..Ρ) GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 #L& [83] (Σ..ϵ) GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F6 #Sm (϶) GREEK REVERSED LUNATE EPSILON SYMBOL
03F7..0481 #L& [139] (Ϸ..ҁ) GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
0482 #So (҂) CYRILLIC THOUSANDS SIGN
0483..0487 #Mn [5] (҃..҇) COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0488..0489 #Me [2] (҈..҉) COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0527 #L& [158] (Ҋ..�) CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
0531..0556 #L& [38] (Ա..Ֆ) ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 #Lm (ՙ) ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F #Po [6] (՚..՟) ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 #L& [39] (ա..և) ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
0589 #Po (։) ARMENIAN FULL STOP
058A #Pd (֊) ARMENIAN HYPHEN
0591..05BD #Mn [45] (֑..ֽ) HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE #Pd (־) HEBREW PUNCTUATION MAQAF
05BF #Mn (ֿ) HEBREW POINT RAFE
05C0 #Po (׀) HEBREW PUNCTUATION PASEQ
05C1..05C2 #Mn [2] (ׁ..ׂ) HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 #Po (׃) HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 #Mn [2] (ׄ..ׅ) HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 #Po (׆) HEBREW PUNCTUATION NUN HAFUKHA
05C7 #Mn (ׇ) HEBREW POINT QAMATS QATAN
05D0..05EA #Lo [27] (א..ת) HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 #Lo [3] (װ..ײ) HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 #Po [2] (׳..״) HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
0600..0603 #Cf [4] (؀..؃) ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
0606..0608 #Sm [3] (؆..؈) ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A #Po [2] (؉..؊) ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B #Sc (؋) AFGHANI SIGN
060C..060D #Po [2] (،..؍) ARABIC COMMA..ARABIC DATE SEPARATOR
060E..060F #So [2] (؎..؏) ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A #Mn [11] (ؐ..ؚ) ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061B #Po (؛) ARABIC SEMICOLON
061E..061F #Po [2] (؞..؟) ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
0620..063F #Lo [32] (�..ؿ) ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 #Lm (ـ) ARABIC TATWEEL
0641..064A #Lo [10] (ف..ي) ARABIC LETTER FEH..ARABIC LETTER YEH
064B..065F #Mn [21] (ً..�) ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0660..0669 #Nd [10] (٠..٩) ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066A..066D #Po [4] (٪..٭) ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F #Lo [2] (ٮ..ٯ) ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0670 #Mn (ٰ) ARABIC LETTER SUPERSCRIPT ALEF
0671..06D3 #Lo [99] (ٱ..ۓ) ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 #Po (۔) ARABIC FULL STOP
06D5 #Lo (ە) ARABIC LETTER AE
06D6..06DC #Mn [7] (ۖ..ۜ) ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DD #Cf (۝) ARABIC END OF AYAH
06DE #Me (۞) ARABIC START OF RUB EL HIZB
06DF..06E4 #Mn [6] (۟..ۤ) ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 #Lm [2] (ۥ..ۦ) ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 #Mn [2] (ۧ..ۨ) ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 #So (۩) ARABIC PLACE OF SAJDAH
06EA..06ED #Mn [4] (۪..ۭ) ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF #Lo [2] (ۮ..ۯ) ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 #Nd [10] (۰..۹) EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC #Lo [3] (ۺ..ۼ) ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE #So [2] (۽..۾) ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF #Lo (ۿ) ARABIC LETTER HEH WITH INVERTED V
0700..070D #Po [14] (܀..܍) SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
070F #Cf (܏) SYRIAC ABBREVIATION MARK
0710 #Lo (ܐ) SYRIAC LETTER ALAPH
0711 #Mn (ܑ) SYRIAC LETTER SUPERSCRIPT ALAPH
0712..072F #Lo [30] (ܒ..ܯ) SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A #Mn [27] (ܰ..݊) SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074D..07A5 #Lo [89] (ݍ..ޥ) SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
07A6..07B0 #Mn [11] (ަ..ް) THAANA ABAFILI..THAANA SUKUN
07B1 #Lo (ޱ) THAANA LETTER NAA
07C0..07C9 #Nd [10] (߀..߉) NKO DIGIT ZERO..NKO DIGIT NINE
07CA..07EA #Lo [33] (ߊ..ߪ) NKO LETTER A..NKO LETTER JONA RA
07EB..07F3 #Mn [9] (߫..߳) NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 #Lm [2] (ߴ..ߵ) NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07F6 #So (߶) NKO SYMBOL OO DENNEN
07F7..07F9 #Po [3] (߷..߹) NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
07FA #Lm (ߺ) NKO LAJANYALAN

## Total: 246,849 (omitting 244,945 from listing)


Let $word = [$alpha $gcAllMarks $digit \p{gc=Connector_Punctuation}]

0030..0039 #Nd [10] (0..9) DIGIT ZERO..DIGIT NINE
0041..005A #L& [26] (A..Z) LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
005F #Pc (_) LOW LINE
0061..007A #L& [26] (a..z) LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA #L& (ª) FEMININE ORDINAL INDICATOR
00B5 #L& (µ) MICRO SIGN
00BA #L& (º) MASCULINE ORDINAL INDICATOR
00C0..00D6 #L& [23] (À..Ö) LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 #L& [31] (Ø..ö) LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA #L& [195] (ø..ƺ) LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB #Lo (ƻ) LATIN LETTER TWO WITH STROKE
01BC..01BF #L& [4] (Ƽ..ƿ) LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 #Lo [4] (ǀ..ǃ) LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 #L& [208] (DŽ..ʓ) LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 #Lo (ʔ) LATIN LETTER GLOTTAL STOP
0295..02AF #L& [27] (ʕ..ʯ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 #Lm [18] (ʰ..ˁ) MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 #Lm [12] (ˆ..ˑ) MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 #Lm [5] (ˠ..ˤ) MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC #Lm (ˬ) MODIFIER LETTER VOICING
02EE #Lm (ˮ) MODIFIER LETTER DOUBLE APOSTROPHE
0300..036F #Mn [112] (̀..ͯ) COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0370..0373 #L& [4] (Ͱ..ͳ) GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 #Lm (ʹ) GREEK NUMERAL SIGN
0376..0377 #L& [2] (Ͷ..ͷ) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A #Lm (ͺ) GREEK YPOGEGRAMMENI
037B..037D #L& [3] (ͻ..ͽ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 #L& (Ά) GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A #L& [3] (Έ..Ί) GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C #L& (Ό) GREEK CAPITAL LETTER OMICRON WITH TONOS

## Total: 102,726 (omitting 102,001 from listing)



# ===========================


# POSIX locale definition file constraints


$upper ∥ [$cntrl $digit $punct $space]

$upper ⊇ [A-Z]


$lower ∥ [$cntrl $digit $punct $space]

$lower ⊇ [a-z]


$alpha ∥ [$cntrl $digit $punct $space]

$alpha ⊇ [$lower $upper]


$digit ⊇ [0-9]


$alnum = [$alpha $digit]


$space ∥ [$upper $lower $alpha $digit $graph $xdigit]

$space ⊇ [$SP $FF $LF $CR] # $TAB $VTAB $NEL]

$space ⊇ $blank


$cntrl ∥ [$upper $lower $alpha $digit $punct $graph $print $xdigit]


$punct ∥ [$upper $lower $alpha $digit $cntrl $xdigit $SP]


$graph ⊇ [$upper $lower $alpha $digit $xdigit $punct]

$graph ∥ [$SP $cntrl]


$print ⊇ [$upper $lower $alpha $digit $xdigit $punct $graph $SP]

$print ∥ $cntrl


$xdigit ⊇ [$digit [a-f A-F]]


$blank ⊇ [$SP $TAB]


# Extra POSIX 'POSIX locale' constraints


Let $C0Controls = [\u0000-\u001F]

0000..001F #Cc [32] (�..�) ..

## Total: 32



$cntrl ⊇ $C0Controls


$punct ⊇ [[\u0021-\u007E] - [0-9 A-Z a-z]]


[$alpha $lower $upper $punct $digit $xdigit $alnum $space $blank $cntrl $graph $print $word] = [^\p{gc=unassigned} \p{gc=surrogate}]


**** SUMMARY ****


# ParseErrorCount=0

# TestFailureCount=0