summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2014-01-13 14:11:18 +0000
committerNick White <git@njw.me.uk>2014-01-13 14:11:18 +0000
commita534eabc92150e47c8ded438379ebc288256f8aa (patch)
tree4bd6fc0e92084a72ccff9caaca3c9aa13a6f6c46
parent18296d37b0c501d4a8c8df06c76b946f9271bbe2 (diff)
downloadoed2dict-a534eabc92150e47c8ded438379ebc288256f8aa.tar.bz2
oed2dict-a534eabc92150e47c8ded438379ebc288256f8aa.zip
Finish symbols
-rw-r--r--TODOsymbols18
-rw-r--r--symbols38
-rw-r--r--xmlcleanup.sed1
3 files changed, 25 insertions, 32 deletions
diff --git a/TODOsymbols b/TODOsymbols
deleted file mode 100644
index 2675e47..0000000
--- a/TODOsymbols
+++ /dev/null
@@ -1,18 +0,0 @@
-atildepr
-bantuo
-Bantuo
-ccedpr
-dlessj1
-dlessj2
-edhpr
-ff0
-ff2
-ff3
-hg
-itildepr
-ohgcirc
-par
-reva
-roasper
-ruasper
-woqab
diff --git a/symbols b/symbols
index ffccee4..bf7b4bb 100644
--- a/symbols
+++ b/symbols
@@ -1,27 +1,25 @@
-# All symbols can be represented either as &symbol; or _symbol_ so
+# All symbols are represented either as &symbol; or _symbol_ so
# parse these with symbolstosed to create sed rules.
# Many of these are documented in the Help/Advanced/ directory of the OED CDROM
# notes:
-# 'frown' is sad face circumflex ('inverted breve', in case of greek just use tilde)
-# 'tilde' is wiggly circumflex
+# 'frown' is inverted breve, except in the case of greek where it means tilde
+# 'tilde' is tilde (~)
# 'circ' is circumflex (^)
# 'dotab' is dot above
+# 'dotbl' is dot below
# 'asper' is asperated
# 'lenis' is non-asperated
# 'mac' is macron U+0304
-# 'uml' is umlaut (e.g. diaeresis)
-# 'ang' is ring above
-# 'ced' is cedilla e.g. garcon
-# 'breve' U+0306
-# 'ac' acute U+0301
-# 'sc' small capitals (changed to normal capitals here)
+# 'uml' is diaeresis (umlaut)
+# 'ang' is ring above (angstrom)
+# 'ced' is cedilla
+# 'breve' is U+0306
+# 'acu' is acute U+0301
+# 'sc' is small capitals (changed to normal capitals here)
+# 'pr' is unknown, seems to look identical so using non-pr characters
-# unknown:
-# pr
-
-# going through in alphabetical order
\#038 \&
\#062 >
aacu á
@@ -92,6 +90,7 @@ amacundl ā̲
asg ᵹ
Asg Ᵹ
atilde ã
+atildepr ã
atildesc Ã
auml ä
aumlsc Ä
@@ -99,6 +98,8 @@ Auml Ä
aundl a̲
ayin ʿ
ayindotabove ʿ̇
+bantuo ω
+Bantuo ω
bbar ƀ
beta β
Beta Β
@@ -111,6 +112,7 @@ cbr }
cbreve c̆
Cbreve C̆
cced ç
+ccedpr ç
ccedsc Ç
Cced Ç
ccirc ĉ
@@ -158,6 +160,8 @@ devt त
devth थ
digamma ͷ
dlessi ı
+dlessj1 ȷ
+dlessj2 ȷ
dlessj3 ȷ
dtilde d̃
dundl ḏ
@@ -177,6 +181,7 @@ Ecirc Ê
egrave è
egravesc È
edh ð
+edhpr ð
Edh Ð
edotab ė
edotabacu ė́
@@ -342,6 +347,7 @@ iota ι
Iota Ι
istlig st
itilde ĩ
+itildepr ĩ
iuml ï
iumlsc Ï
iundl i̲
@@ -439,6 +445,7 @@ ofrown ȏ
ograve ò
ogravesc Ò
ohacek ǒ
+ohgcirc ^
ohook ỏ
ohookacu ỏ́
ohookbreve ỏ̆
@@ -503,7 +510,7 @@ Rdotbl Ṛ
rdotblacu ṛ́
rdotblmac ṝ
real ℝ
-reva ɐ
+reva ɒ
revc ↄ
revC Ↄ
revctilde ↄ̃
@@ -521,8 +528,10 @@ rhacek ř
rho ρ
Rho Ρ
rlenis ῤ
+roasper o
rsyllab r̩
rtilde r̃
+ruasper u
runash ᚨ
runwyn ᚹ
rundl ṟ
@@ -664,6 +673,7 @@ Wlenisacu Ὤ
wlenisfrown ὦ
wlenisisub ᾠ
wmac w̄
+woqab w̓
wyn ƿ
Wyn Ƿ
xdotab ẋ
diff --git a/xmlcleanup.sed b/xmlcleanup.sed
index c757859..40c6bde 100644
--- a/xmlcleanup.sed
+++ b/xmlcleanup.sed
@@ -3,6 +3,7 @@
# seemingly unneeded control characters
s/&ff[0-9];//g
s/xxff0//g
+s/xxff2//g
s/xxff3//g
# <q> corresponds to a quote