diff options
-rw-r--r-- | TODOsymbols | 18 | ||||
-rw-r--r-- | symbols | 38 | ||||
-rw-r--r-- | xmlcleanup.sed | 1 |
3 files changed, 25 insertions, 32 deletions
diff --git a/TODOsymbols b/TODOsymbols deleted file mode 100644 index 2675e47..0000000 --- a/TODOsymbols +++ /dev/null @@ -1,18 +0,0 @@ -atildepr -bantuo -Bantuo -ccedpr -dlessj1 -dlessj2 -edhpr -ff0 -ff2 -ff3 -hg -itildepr -ohgcirc -par -reva -roasper -ruasper -woqab @@ -1,27 +1,25 @@ -# All symbols can be represented either as &symbol; or _symbol_ so +# All symbols are represented either as &symbol; or _symbol_ so # parse these with symbolstosed to create sed rules. # Many of these are documented in the Help/Advanced/ directory of the OED CDROM # notes: -# 'frown' is sad face circumflex ('inverted breve', in case of greek just use tilde) -# 'tilde' is wiggly circumflex +# 'frown' is inverted breve, except in the case of greek where it means tilde +# 'tilde' is tilde (~) # 'circ' is circumflex (^) # 'dotab' is dot above +# 'dotbl' is dot below # 'asper' is asperated # 'lenis' is non-asperated # 'mac' is macron U+0304 -# 'uml' is umlaut (e.g. diaeresis) -# 'ang' is ring above -# 'ced' is cedilla e.g. garcon -# 'breve' U+0306 -# 'ac' acute U+0301 -# 'sc' small capitals (changed to normal capitals here) +# 'uml' is diaeresis (umlaut) +# 'ang' is ring above (angstrom) +# 'ced' is cedilla +# 'breve' is U+0306 +# 'acu' is acute U+0301 +# 'sc' is small capitals (changed to normal capitals here) +# 'pr' is unknown, seems to look identical so using non-pr characters -# unknown: -# pr - -# going through in alphabetical order \#038 \& \#062 > aacu á @@ -92,6 +90,7 @@ amacundl ā̲ asg ᵹ Asg Ᵹ atilde ã +atildepr ã atildesc à auml ä aumlsc Ä @@ -99,6 +98,8 @@ Auml Ä aundl a̲ ayin ʿ ayindotabove ʿ̇ +bantuo ω +Bantuo ω bbar ƀ beta β Beta Β @@ -111,6 +112,7 @@ cbr } cbreve c̆ Cbreve C̆ cced ç +ccedpr ç ccedsc Ç Cced Ç ccirc ĉ @@ -158,6 +160,8 @@ devt त devth थ digamma ͷ dlessi ı +dlessj1 ȷ +dlessj2 ȷ dlessj3 ȷ dtilde d̃ dundl ḏ @@ -177,6 +181,7 @@ Ecirc Ê egrave è egravesc È edh ð +edhpr ð Edh Ð edotab ė edotabacu ė́ @@ -342,6 +347,7 @@ iota ι Iota Ι istlig st itilde ĩ +itildepr ĩ iuml ï iumlsc Ï iundl i̲ @@ -439,6 +445,7 @@ ofrown ȏ ograve ò ogravesc Ò ohacek ǒ +ohgcirc ^ ohook ỏ ohookacu ỏ́ ohookbreve ỏ̆ @@ -503,7 +510,7 @@ Rdotbl Ṛ rdotblacu ṛ́ rdotblmac ṝ real ℝ -reva ɐ +reva ɒ revc ↄ revC Ↄ revctilde ↄ̃ @@ -521,8 +528,10 @@ rhacek ř rho ρ Rho Ρ rlenis ῤ +roasper o rsyllab r̩ rtilde r̃ +ruasper u runash ᚨ runwyn ᚹ rundl ṟ @@ -664,6 +673,7 @@ Wlenisacu Ὤ wlenisfrown ὦ wlenisisub ᾠ wmac w̄ +woqab w̓ wyn ƿ Wyn Ƿ xdotab ẋ diff --git a/xmlcleanup.sed b/xmlcleanup.sed index c757859..40c6bde 100644 --- a/xmlcleanup.sed +++ b/xmlcleanup.sed @@ -3,6 +3,7 @@ # seemingly unneeded control characters s/&ff[0-9];//g s/xxff0//g +s/xxff2//g s/xxff3//g # <q> corresponds to a quote |