summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2013-10-11 14:31:05 +0100
committerNick White <git@njw.me.uk>2013-10-11 14:31:05 +0100
commit3357cde96f91738e69617ae65165209ca9f504c6 (patch)
treee04848c8e62e5101a505c61a6a061d525e6c7293
parent5d0c5974e2655a6f66153bffb67c9346c2c4a589 (diff)
downloadoed2dict-3357cde96f91738e69617ae65165209ca9f504c6.tar.bz2
oed2dict-3357cde96f91738e69617ae65165209ca9f504c6.zip
Make progress systematically tackling all entities/symbols
-rw-r--r--Makefile2
-rw-r--r--README8
-rw-r--r--TODOentities459
-rw-r--r--symbols262
4 files changed, 714 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index cfb4484..1e709f6 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ SRC = 1.htm 2.htm 3.htm 4.htm 5.htm \
6.htm 7.htm 8.htm 9.htm 10.htm
JARGONS = $(SRC:.htm=.jargon)
-$(JARGONS): htmtojargon.awk separateheadmeta.sed symbols.sed \
+$(JARGONS): htmtojargon.awk removeetags.pl symbols.sed \
xmlcleanup.sed xmlcleanup2.sed
all: oed.jargon oed.dict.dz
diff --git a/README b/README
index eaeab74..af81ead 100644
--- a/README
+++ b/README
@@ -24,11 +24,15 @@ Install
- To install the dictionary for dictd, run 'make install'
-Note
-----
+Notes
+-----
The dictionary included in v3 is the same as v4, but with v4 the
OED started encrypting the dictionary files. They probably wouldn't
be difficult to decrypt, but I haven't the time or inclination to
do so. If anyone wants to, let me know how you get on and I'll
very happily incorporate support for it.
+
+If your version of make supports it, and you have multiple CPUs, you
+can use 'make -j' instead of 'make' to significantly speed up the
+build process.
diff --git a/TODOentities b/TODOentities
new file mode 100644
index 0000000..4cd33b1
--- /dev/null
+++ b/TODOentities
@@ -0,0 +1,459 @@
+aacuced
+aacusc
+aangsc
+acircsc # acirc is 00e5...
+agravesc
+atildepr
+atildesc
+aumlsc
+bantuo
+Bantuo
+cbigb
+cbigpren
+cbigsb
+ccedpr
+ccedsc
+cdsb
+clenis
+click
+ctlig
+dlessj1
+dlessj2
+eacusc
+ecircsc
+edhpr
+egravesc
+elenis
+Elenis
+elenisacu
+Elenisacu
+Elenisfrown
+elenisgrave
+eth
+eumlsc
+ff0
+ff2
+ff3
+hasper
+Hasper
+hasperacu
+Hasperacu
+hasperfrown
+hasperisubfrown
+hbar
+Hbrbl
+hdotab
+hdotbl
+Hdotbl
+hebtav
+hg
+hgz
+hireq
+hisub
+hisubacu
+hisubfrown
+hlenis
+Hlenis
+hlenisacu
+hlenisfrown
+hlenisgrave
+hundl
+iacu
+Iacu
+iacusc
+iasper
+Iasper
+iasperacu
+Iasperacu
+iasperfrown
+ibar
+ibreve
+Ibreve
+ibrevemac
+iced
+icirc
+Icirc
+icircsc
+ifflig
+ifilig
+ifrbl
+igrave
+igravesc
+ihacek
+ilenis
+Ilenis
+ilenisacu
+Ilenisacu
+ilenisfrown
+ilenismac
+ilenismacacu
+imac
+Imac
+imacacu
+imacbreve
+imachacek
+imacuml
+imacundl
+iota
+Iota
+istlig
+itilde
+itildepr
+iuml
+iumlsc
+iundl
+jbreve
+jhacek
+jundl
+kappa
+Kappa
+kdotbl
+Kdotbl
+kundl
+Kundl
+lambda
+Lambda
+lbar
+Lbar
+lcircbl
+ldotbl
+lm
+longs
+lt
+lumlbl
+mcircbl
+mdotab
+mdotbl
+mdotblacu
+mfrown
+msyllab
+mtilde
+mu
+Mu
+mundl
+nacu
+nbreve
+nbsp
+ncirc
+ncircbl
+ndotab
+Ndotab
+ndotbl
+nfacu
+nfasper
+nfbreve
+nfced
+nfcirc
+nffrown
+nfgra
+nfhacek
+nfmac
+nftilde
+nfuml
+ng
+nhacek
+nmac
+ntilde
+ntildesc
+nu
+Nu
+nundl
+oacu
+Oacu
+oacusc
+oasper
+Oasper
+oasperacu
+Oasperacu
+oaspergrave
+obar
+Obar
+obaracu
+obarmac
+obarpr
+obigb
+obigpren
+obigsb
+obr
+obreve
+Obreve
+obrevemac
+ocirc
+Ocirc
+ocircgra
+ocircsc
+odotab
+odotblmac
+odsb
+oe
+Oe
+oeacu
+oemac
+oemacbreve
+oepr
+oetilde
+ofrown
+ograve
+ogravesc
+ohacek
+ohgcirc
+ohook
+ohookacu
+ohookbreve
+ohookmac
+oldbeta
+olenis
+Olenis
+olenisacu
+Olenisacu
+olenisfrown
+olenisgrave
+omac
+Omac
+omacacu
+omacbreve
+omactilde
+omega
+Omega
+omicron
+Omicron
+ope
+opemac
+opetilde
+otilde
+otildepr
+otildesc
+ouml
+Ouml
+oumlmac
+oumlsc
+oundl
+p
+pall
+paln
+par
+pbar
+pdotab
+phi
+Phi
+phi2
+pi
+Pi
+pmac
+psi
+Psi
+ptilde
+pundl
+qamets
+qdotab
+qhacek
+qtilde
+racu
+rasper
+Rasper
+rbreve
+rcircbl
+rcircblmac
+rdotab
+Rdotab
+rdotbl
+Rdotbl
+rdotblacu
+rdotblmac
+real
+reva
+revc
+revC
+revctilde
+revope
+revopehook
+revr
+revrmac
+revsc
+revv
+revvmac
+revvtilde
+rfa
+rglots
+rhacek
+rho
+Rho
+rlenis
+roasper
+rsyllab
+rtilde
+ruasper
+runash
+rundl
+runwyn
+rvow
+sacu
+Sacu
+sbreve
+Sbreve
+sced
+Sced
+schwa
+schwaacu
+schwafrbl
+schwamac
+schwatilde
+scirc
+sdotab
+Sdotab
+sdotbl
+Sdotbl
+segol
+sgrave
+sh
+shacek
+Shacek
+shacekdotab
+shacekdotbl
+shadda
+sheva
+shook
+Shook
+shti
+shtsyllmac
+shtu
+sigma
+Sigma
+sm
+Smac
+smm
+smmpr
+smpr
+smR
+smY
+stilde
+stlig
+sundl
+Sundl
+tacu
+tau
+Tau
+Tbarab
+tced
+tdotab
+tdotbl
+Tdotbl
+th
+Th
+thbar
+Thbar
+thdotab
+theta
+Theta
+thinqm
+times
+ttilde
+tundl
+Tundl
+uacu
+Uacu
+uacusc
+uang
+uangtilde
+uasper
+Uasper
+uasperacu
+Uasperacu
+uasperfrown
+ubar
+ubreve
+ucirc
+Ucirc
+ucircsc
+uda
+udA
+udatilde
+udh
+udotab
+udqm
+udw
+ufrbl
+ufrown
+ugrave
+uhacek
+uhook
+ulenis
+Ulenis
+ulenisacu
+Ulenisacu
+ulenisfrown
+umac
+Umac
+umacacu
+umacbreve
+upsilon
+Upsilon
+utilde
+utildeacu
+uuml
+Uuml
+uumlacu
+uumlcirc
+uumlgra
+uumlhacek
+uumlmac
+uumlsc
+vavpath
+vavsheva
+vdftheta
+vuml
+vvf
+wacu
+wapos
+wasper
+Wasper
+wasperacu
+wasperfrown
+Wasperfrown
+wcirc
+wdotbl
+wgrave
+whacek
+wisub
+wisubacu
+wisubfrown
+wlenis
+Wlenis
+wlenisacu
+Wlenisacu
+wlenisfrown
+wlenisisub
+wmac
+woqab
+wyn
+Wyn
+xdotab
+xi
+Xi
+yacu
+yacusc
+ybreve
+ycirc
+ydotab
+Ydotab
+ygh
+Ygh
+ygra
+ymac
+ymacacu
+ymacbreve
+ytilde
+yuml
+Yundl
+zacu
+zbreve
+zced
+zdotab
+zdotbl
+zeta
+Zeta
+zh
+zhacek
+Zhacek
+zhacekdotab
+zmac
diff --git a/symbols b/symbols
index 797961c..7056f85 100644
--- a/symbols
+++ b/symbols
@@ -7,13 +7,10 @@
revsc ·
# Ligature digraphs
-Ae Æ
-ae æ
Oe Œ
oe œ
# Phonetic symbols
-aepr ɶ
ccedpr θ
edhpr ð
fata ɑ
@@ -31,14 +28,11 @@ smm ˌ
smmpr ˌ
zh ʝ
-Abarab Ā
-
# These are extras which aren't documented in the symbol list
th th
Th Th
-Aang Å
ouml ö
oq ‘
@@ -49,16 +43,256 @@ times ×
# Basic HTML entities
# TODO: generate these from http://www.w3.org/TR/html4/sgml/entities.html or http://www.w3.org/TR/xml-entity-names/
\#038 \&
+oacu ó
+ocirc ô
+
+# notes:
+# 'frown' is sad face circumflex ('inverted breve', in case of greek just use circumflex)
+# 'tilde' is wiggly circumflex
+# 'circ' is circumflex (^)
+# 'dotab' is dot above
+# 'asper' is asperated
+# 'lenis' is non-asperated
+# 'mac' is macron U+0304
+# 'uml' is umlaut (e.g. diaeresis)
+# 'ang' is ring above
+# 'ced' is cedilla e.g. garcon
+# 'breve' U+0306
+# 'ac' acute U+0301
+
+# unknown:
+# pr
+# sc
+# ced
+
+# going through in alphabetical order
+\#038 \&
+\#062 >
aacu á
+Aacu Á
+aang å
+Aang Å
+aasper ἁ
+Aasper Ἁ
+aasperacu ἅ
+Aasperacu Ἅ
+aasperfrown ἇ
+aaspergrav ἃ
+aasperisubacu ᾅ
+Abarab Ā
+abreve ă
+Abreve Ă
+aced a̧
+acirc â
+Acirc Â
+adotab ȧ
+ae æ
+aeacu ǽ
+aebreve æ̆
+aecirc æ̂
+aedotab æ̇
+aemac ǣ
+aemacbreve æ̆
+aetilte æ̃
+Ae Æ
+Aeacu Ǽ
+Aemac Ǣ
+aepr ɶ
+afrown â
+Afrown Â
agrave à
+Agrave À
+ahacek ǎ
+ahook ả
+ahookmac ả̄
+aisub ᾳ
+aisubacu ᾴ
+aisubfrown ᾰͅ
+aisubtilde ᾷ
+alenis ἀ
+Alenis Ἀ
+alenisacu ἄ
+Alenisacu Ἄ
+alenisfrown ἆ
+alenisgrave ἂ
+alenisisub ᾀ
+alenisisubacu ᾄ
+alenisisubfrown ᾆ
+alenismac ᾱ̓
+alpha α
+Alpha Α
+amac ā
+Amac Ā
+amacacu ā́
+amacbreve ā̆
+amacdotab ā̇
+amactilde ā̃
+amacundl ā̲
+asg ᵹ
+Asg Ᵹ
+atilde ã
auml ä
+Auml Ä
+aundl a̲
+ayin ʿ
+ayindotabove ʿ̇
+bbar ƀ
+beta β
+Beta Β
+bundl b̲
+cacu ć
+cbr }
+cbreve c̆
+Cbreve C̆
+cced ç
+Cced Ç
+ccirc ĉ
+cdotab ċ
+cdotbl c̣
+chacek č
+Chacek Č
+chacekdotab č̇
+chi χ
+Chi Χ
+chook ƈ
+cmac c̄
+ctilde c̃
+cyra а
+cyrd д
+cyre э
+cyrhard ъ
+cyrjat ѣ
+cyrm м
+cyrn н
+cyrO О
+cyrP П
+cyrr р
+cyrsoft ь
+cyrt т
+cyry ы
+dbar đ
+dced ḑ
+ddotab ḋ
+ddotbl ḍ
+Ddotbl ḍ
+delta δ
+Delta Δ
+devdh ध
+devph फ
+devrfls ष
+devrt ट
+devrth ठ
+devt त
+devth थ
+digamma ͷ
+dlessi ı
+dlessj3 ȷ
+dtilde d̃
+dundl ḏ
eacu é
+Eacu É
+eacudotbl ẹ́
+easper ἑ
+Easper Ἑ
+easperacu ἕ
+Easperacu Ἕ
+easpergrave ἓ
+ebreve ĕ
+ecirc ê
+Ecirc Ê
egrave è
-oacu ó
-ocirc ô
-
-# TODO
-# edh - unknown
-# ygh - some variant of y
-# thbar - probably th with bar above
-# asg
+edh ð
+Edh Ð
+edotab ė
+edotabacu ė́
+edotabtilde ė̃
+edotbl ẹ
+Edotbl Ẹ
+edotblmac ẹ̄
+egrave è
+egsampi ͳ
+egy3 ꜣ
+egyasper ꜥ
+ehacek ě
+ehook ẻ
+Ehook Ẻ
+ehookacu ẻ́
+ehookmac ẻ̄
+ehookmacbreve ẻ̄̆
+emac ē
+Emac Ē
+emacacu ḗ
+emacbreve ē̆
+emachacek ē̌
+epsilon ε
+Epsilon Ε
+epsiloncirc ε̂
+eszett ß
+eta η
+Eta Η
+etaisub ῃ
+etilde ẽ
+euml ë
+eumlacu ë́
+eundl e̱
+eundlacu é̱
+fata ɑ
+fatatilde ɑ̃
+ffilig ffi
+ffllig ffl
+fllig fl
+fsigma ς
+gaacu ά
+gabreve ᾰ
+gacu ǵ
+gafrown ᾶ
+gagrave ὰ
+gamac ᾱ
+gamacbreve ᾱ̆
+gamma γ
+Gamma Γ
+gaumlisub αͺ̈
+gbreve ğ
+Gbreve Ğ
+gcirc ĝ
+gdotab ġ
+geacu έ
+gegrave ὲ
+ghacek ǧ
+ghacu ή
+ghfrown ῆ
+ghgrave ὴ
+ghmac η̄
+giacu ί
+gibreve ῐ
+gibreveacu ῐ́
+gifrown ῖ
+gigrave ὶ
+gimac ῑ
+gimacacu ῑ́
+giuml ϊ
+giumlacu ΐ
+giumlgrave ῒ
+glagjat ⱑ
+glagjeri ⱐ
+glagjeru ⱏ
+glots ʔ
+gmac ḡ
+goacu ό
+gobreve ο̆
+gograve ὸ
+gt >
+guacu ύ
+guacumac ύ̄
+gubreve ῠ
+gufrown ῦ
+gugrave ὺ
+gumac ῡ
+gumacacu ῡ́
+gundl g̲
+Gundl G̲
+guuml ϋ
+guumlacu ΰ
+gwacu ώ
+gwfrown ῶ
+gwgrave ὼ