From a534eabc92150e47c8ded438379ebc288256f8aa Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.me.uk>
Date: Mon, 13 Jan 2014 14:11:18 +0000
Subject: Finish symbols

---
 TODOsymbols    | 18 ------------------
 symbols        | 38 ++++++++++++++++++++++++--------------
 xmlcleanup.sed |  1 +
 3 files changed, 25 insertions(+), 32 deletions(-)
 delete mode 100644 TODOsymbols

diff --git a/TODOsymbols b/TODOsymbols
deleted file mode 100644
index 2675e47..0000000
--- a/TODOsymbols
+++ /dev/null
@@ -1,18 +0,0 @@
-atildepr
-bantuo
-Bantuo
-ccedpr
-dlessj1
-dlessj2
-edhpr
-ff0
-ff2
-ff3
-hg
-itildepr
-ohgcirc
-par
-reva
-roasper
-ruasper
-woqab
diff --git a/symbols b/symbols
index ffccee4..bf7b4bb 100644
--- a/symbols
+++ b/symbols
@@ -1,27 +1,25 @@
-# All symbols can be represented either as &symbol; or _symbol_ so
+# All symbols are represented either as &symbol; or _symbol_ so
 # parse these with symbolstosed to create sed rules.
 
 # Many of these are documented in the Help/Advanced/ directory of the OED CDROM
 
 # notes:
-# 'frown' is sad face circumflex ('inverted breve', in case of greek just use tilde)
-# 'tilde' is wiggly circumflex
+# 'frown' is inverted breve, except in the case of greek where it means tilde
+# 'tilde' is tilde (~)
 # 'circ' is circumflex (^)
 # 'dotab' is dot above
+# 'dotbl' is dot below
 # 'asper' is asperated
 # 'lenis' is non-asperated
 # 'mac' is macron U+0304
-# 'uml' is umlaut (e.g. diaeresis)
-# 'ang' is ring above
-# 'ced' is cedilla e.g. garcon
-# 'breve' U+0306
-# 'ac' acute U+0301
-# 'sc' small capitals (changed to normal capitals here)
+# 'uml' is diaeresis (umlaut)
+# 'ang' is ring above (angstrom)
+# 'ced' is cedilla
+# 'breve' is U+0306
+# 'acu' is acute U+0301
+# 'sc' is small capitals (changed to normal capitals here)
+# 'pr' is unknown, seems to look identical so using non-pr characters
 
-# unknown:
-# pr
-
-# going through in alphabetical order
 \#038 \&
 \#062 >
 aacu á
@@ -92,6 +90,7 @@ amacundl ā̲
 asg ᵹ
 Asg Ᵹ
 atilde ã
+atildepr ã
 atildesc Ã
 auml ä
 aumlsc Ä
@@ -99,6 +98,8 @@ Auml Ä
 aundl a̲
 ayin ʿ
 ayindotabove ʿ̇
+bantuo ω
+Bantuo ω
 bbar ƀ
 beta β
 Beta Β
@@ -111,6 +112,7 @@ cbr }
 cbreve c̆
 Cbreve C̆
 cced ç
+ccedpr ç
 ccedsc Ç
 Cced Ç
 ccirc ĉ
@@ -158,6 +160,8 @@ devt त
 devth थ
 digamma ͷ
 dlessi ı
+dlessj1 ȷ
+dlessj2 ȷ
 dlessj3 ȷ
 dtilde d̃
 dundl ḏ
@@ -177,6 +181,7 @@ Ecirc Ê
 egrave è
 egravesc È
 edh ð
+edhpr ð
 Edh Ð
 edotab ė
 edotabacu ė́
@@ -342,6 +347,7 @@ iota ι
 Iota Ι
 istlig st
 itilde ĩ
+itildepr ĩ
 iuml ï
 iumlsc Ï
 iundl i̲
@@ -439,6 +445,7 @@ ofrown ȏ
 ograve ò
 ogravesc Ò
 ohacek ǒ
+ohgcirc ^
 ohook ỏ
 ohookacu ỏ́
 ohookbreve ỏ̆
@@ -503,7 +510,7 @@ Rdotbl Ṛ
 rdotblacu ṛ́
 rdotblmac ṝ
 real ℝ
-reva ɐ
+reva ɒ
 revc ↄ
 revC Ↄ
 revctilde ↄ̃
@@ -521,8 +528,10 @@ rhacek ř
 rho ρ
 Rho Ρ
 rlenis ῤ
+roasper o
 rsyllab r̩
 rtilde r̃
+ruasper u
 runash ᚨ
 runwyn ᚹ
 rundl ṟ
@@ -664,6 +673,7 @@ Wlenisacu Ὤ
 wlenisfrown ὦ
 wlenisisub ᾠ
 wmac w̄
+woqab w̓
 wyn ƿ
 Wyn Ƿ
 xdotab ẋ
diff --git a/xmlcleanup.sed b/xmlcleanup.sed
index c757859..40c6bde 100644
--- a/xmlcleanup.sed
+++ b/xmlcleanup.sed
@@ -3,6 +3,7 @@
 # seemingly unneeded control characters
 s/&ff[0-9];//g
 s/xxff0//g
+s/xxff2//g
 s/xxff3//g
 
 # <q> corresponds to a quote
-- 
cgit v1.2.3