1#!/bin/sh 2 3# vim: indentexpr= nosmartindent autoindent 4# vim: tabstop=2 shiftwidth=2 softtabstop=2 5 6# This regex was manually written, derived from the rules in UAX #29. 7# Particularly, from Table 1c, which lays out a regex for grapheme clusters. 8 9CR="\p{gcb=CR}" 10LF="\p{gcb=LF}" 11Control="\p{gcb=Control}" 12Prepend="\p{gcb=Prepend}" 13L="\p{gcb=L}" 14V="\p{gcb=V}" 15LV="\p{gcb=LV}" 16LVT="\p{gcb=LVT}" 17T="\p{gcb=T}" 18RI="\p{gcb=RI}" 19Extend="\p{gcb=Extend}" 20ZWJ="\p{gcb=ZWJ}" 21SpacingMark="\p{gcb=SpacingMark}" 22 23Any="\p{any}" 24ExtendPict="\p{Extended_Pictographic}" 25 26echo "(?x) 27$CR $LF 28| 29$Control 30| 31$Prepend* 32( 33 ( 34 ($L* ($V+ | $LV $V* | $LVT) $T*) 35 | 36 $L+ 37 | 38 $T+ 39 ) 40 | 41 $RI $RI 42 | 43 $ExtendPict ($Extend* $ZWJ $ExtendPict)* 44 | 45 [^$Control $CR $LF] 46) 47[$Extend $ZWJ $SpacingMark]* 48| 49$Any 50" 51