1#!/bin/sh
2
3# vim: indentexpr= nosmartindent autoindent
4# vim: tabstop=2 shiftwidth=2 softtabstop=2
5
6# This regex was manually written, derived from the rules in UAX #29.
7# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
8
9CR="\p{gcb=CR}"
10LF="\p{gcb=LF}"
11Control="\p{gcb=Control}"
12Prepend="\p{gcb=Prepend}"
13L="\p{gcb=L}"
14V="\p{gcb=V}"
15LV="\p{gcb=LV}"
16LVT="\p{gcb=LVT}"
17T="\p{gcb=T}"
18RI="\p{gcb=RI}"
19Extend="\p{gcb=Extend}"
20ZWJ="\p{gcb=ZWJ}"
21SpacingMark="\p{gcb=SpacingMark}"
22
23Any="\p{any}"
24ExtendPict="\p{Extended_Pictographic}"
25
26echo "(?x)
27$CR $LF
28|
29$Control
30|
31$Prepend*
32(
33  (
34    ($L* ($V+ | $LV $V* | $LVT) $T*)
35    |
36    $L+
37    |
38    $T+
39  )
40  |
41  $RI $RI
42  |
43  $ExtendPict ($Extend* $ZWJ $ExtendPict)*
44  |
45  [^$Control $CR $LF]
46)
47[$Extend $ZWJ $SpacingMark]*
48|
49$Any
50"
51