From 3357cde96f91738e69617ae65165209ca9f504c6 Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 11 Oct 2013 14:31:05 +0100 Subject: Make progress systematically tackling all entities/symbols --- Makefile | 2 +- README | 8 +- TODOentities | 459 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ symbols | 262 ++++++++++++++++++++++++++++++++-- 4 files changed, 714 insertions(+), 17 deletions(-) create mode 100644 TODOentities diff --git a/Makefile b/Makefile index cfb4484..1e709f6 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ SRC = 1.htm 2.htm 3.htm 4.htm 5.htm \ 6.htm 7.htm 8.htm 9.htm 10.htm JARGONS = $(SRC:.htm=.jargon) -$(JARGONS): htmtojargon.awk separateheadmeta.sed symbols.sed \ +$(JARGONS): htmtojargon.awk removeetags.pl symbols.sed \ xmlcleanup.sed xmlcleanup2.sed all: oed.jargon oed.dict.dz diff --git a/README b/README index eaeab74..af81ead 100644 --- a/README +++ b/README @@ -24,11 +24,15 @@ Install - To install the dictionary for dictd, run 'make install' -Note ----- +Notes +----- The dictionary included in v3 is the same as v4, but with v4 the OED started encrypting the dictionary files. They probably wouldn't be difficult to decrypt, but I haven't the time or inclination to do so. If anyone wants to, let me know how you get on and I'll very happily incorporate support for it. + +If your version of make supports it, and you have multiple CPUs, you +can use 'make -j' instead of 'make' to significantly speed up the +build process. diff --git a/TODOentities b/TODOentities new file mode 100644 index 0000000..4cd33b1 --- /dev/null +++ b/TODOentities @@ -0,0 +1,459 @@ +aacuced +aacusc +aangsc +acircsc # acirc is 00e5... +agravesc +atildepr +atildesc +aumlsc +bantuo +Bantuo +cbigb +cbigpren +cbigsb +ccedpr +ccedsc +cdsb +clenis +click +ctlig +dlessj1 +dlessj2 +eacusc +ecircsc +edhpr +egravesc +elenis +Elenis +elenisacu +Elenisacu +Elenisfrown +elenisgrave +eth +eumlsc +ff0 +ff2 +ff3 +hasper +Hasper +hasperacu +Hasperacu +hasperfrown +hasperisubfrown +hbar +Hbrbl +hdotab +hdotbl +Hdotbl +hebtav +hg +hgz +hireq +hisub +hisubacu +hisubfrown +hlenis +Hlenis +hlenisacu +hlenisfrown +hlenisgrave +hundl +iacu +Iacu +iacusc +iasper +Iasper +iasperacu +Iasperacu +iasperfrown +ibar +ibreve +Ibreve +ibrevemac +iced +icirc +Icirc +icircsc +ifflig +ifilig +ifrbl +igrave +igravesc +ihacek +ilenis +Ilenis +ilenisacu +Ilenisacu +ilenisfrown +ilenismac +ilenismacacu +imac +Imac +imacacu +imacbreve +imachacek +imacuml +imacundl +iota +Iota +istlig +itilde +itildepr +iuml +iumlsc +iundl +jbreve +jhacek +jundl +kappa +Kappa +kdotbl +Kdotbl +kundl +Kundl +lambda +Lambda +lbar +Lbar +lcircbl +ldotbl +lm +longs +lt +lumlbl +mcircbl +mdotab +mdotbl +mdotblacu +mfrown +msyllab +mtilde +mu +Mu +mundl +nacu +nbreve +nbsp +ncirc +ncircbl +ndotab +Ndotab +ndotbl +nfacu +nfasper +nfbreve +nfced +nfcirc +nffrown +nfgra +nfhacek +nfmac +nftilde +nfuml +ng +nhacek +nmac +ntilde +ntildesc +nu +Nu +nundl +oacu +Oacu +oacusc +oasper +Oasper +oasperacu +Oasperacu +oaspergrave +obar +Obar +obaracu +obarmac +obarpr +obigb +obigpren +obigsb +obr +obreve +Obreve +obrevemac +ocirc +Ocirc +ocircgra +ocircsc +odotab +odotblmac +odsb +oe +Oe +oeacu +oemac +oemacbreve +oepr +oetilde +ofrown +ograve +ogravesc +ohacek +ohgcirc +ohook +ohookacu +ohookbreve +ohookmac +oldbeta +olenis +Olenis +olenisacu +Olenisacu +olenisfrown +olenisgrave +omac +Omac +omacacu +omacbreve +omactilde +omega +Omega +omicron +Omicron +ope +opemac +opetilde +otilde +otildepr +otildesc +ouml +Ouml +oumlmac +oumlsc +oundl +p +pall +paln +par +pbar +pdotab +phi +Phi +phi2 +pi +Pi +pmac +psi +Psi +ptilde +pundl +qamets +qdotab +qhacek +qtilde +racu +rasper +Rasper +rbreve +rcircbl +rcircblmac +rdotab +Rdotab +rdotbl +Rdotbl +rdotblacu +rdotblmac +real +reva +revc +revC +revctilde +revope +revopehook +revr +revrmac +revsc +revv +revvmac +revvtilde +rfa +rglots +rhacek +rho +Rho +rlenis +roasper +rsyllab +rtilde +ruasper +runash +rundl +runwyn +rvow +sacu +Sacu +sbreve +Sbreve +sced +Sced +schwa +schwaacu +schwafrbl +schwamac +schwatilde +scirc +sdotab +Sdotab +sdotbl +Sdotbl +segol +sgrave +sh +shacek +Shacek +shacekdotab +shacekdotbl +shadda +sheva +shook +Shook +shti +shtsyllmac +shtu +sigma +Sigma +sm +Smac +smm +smmpr +smpr +smR +smY +stilde +stlig +sundl +Sundl +tacu +tau +Tau +Tbarab +tced +tdotab +tdotbl +Tdotbl +th +Th +thbar +Thbar +thdotab +theta +Theta +thinqm +times +ttilde +tundl +Tundl +uacu +Uacu +uacusc +uang +uangtilde +uasper +Uasper +uasperacu +Uasperacu +uasperfrown +ubar +ubreve +ucirc +Ucirc +ucircsc +uda +udA +udatilde +udh +udotab +udqm +udw +ufrbl +ufrown +ugrave +uhacek +uhook +ulenis +Ulenis +ulenisacu +Ulenisacu +ulenisfrown +umac +Umac +umacacu +umacbreve +upsilon +Upsilon +utilde +utildeacu +uuml +Uuml +uumlacu +uumlcirc +uumlgra +uumlhacek +uumlmac +uumlsc +vavpath +vavsheva +vdftheta +vuml +vvf +wacu +wapos +wasper +Wasper +wasperacu +wasperfrown +Wasperfrown +wcirc +wdotbl +wgrave +whacek +wisub +wisubacu +wisubfrown +wlenis +Wlenis +wlenisacu +Wlenisacu +wlenisfrown +wlenisisub +wmac +woqab +wyn +Wyn +xdotab +xi +Xi +yacu +yacusc +ybreve +ycirc +ydotab +Ydotab +ygh +Ygh +ygra +ymac +ymacacu +ymacbreve +ytilde +yuml +Yundl +zacu +zbreve +zced +zdotab +zdotbl +zeta +Zeta +zh +zhacek +Zhacek +zhacekdotab +zmac diff --git a/symbols b/symbols index 797961c..7056f85 100644 --- a/symbols +++ b/symbols @@ -7,13 +7,10 @@ revsc · # Ligature digraphs -Ae Æ -ae æ Oe Œ oe œ # Phonetic symbols -aepr ɶ ccedpr θ edhpr ð fata ɑ @@ -31,14 +28,11 @@ smm ˌ smmpr ˌ zh ʝ -Abarab Ā - # These are extras which aren't documented in the symbol list th th Th Th -Aang Å ouml ö oq ‘ @@ -49,16 +43,256 @@ times × # Basic HTML entities # TODO: generate these from http://www.w3.org/TR/html4/sgml/entities.html or http://www.w3.org/TR/xml-entity-names/ \#038 \& +oacu ó +ocirc ô + +# notes: +# 'frown' is sad face circumflex ('inverted breve', in case of greek just use circumflex) +# 'tilde' is wiggly circumflex +# 'circ' is circumflex (^) +# 'dotab' is dot above +# 'asper' is asperated +# 'lenis' is non-asperated +# 'mac' is macron U+0304 +# 'uml' is umlaut (e.g. diaeresis) +# 'ang' is ring above +# 'ced' is cedilla e.g. garcon +# 'breve' U+0306 +# 'ac' acute U+0301 + +# unknown: +# pr +# sc +# ced + +# going through in alphabetical order +\#038 \& +\#062 > aacu á +Aacu Á +aang å +Aang Å +aasper ἁ +Aasper Ἁ +aasperacu ἅ +Aasperacu Ἅ +aasperfrown ἇ +aaspergrav ἃ +aasperisubacu ᾅ +Abarab Ā +abreve ă +Abreve Ă +aced a̧ +acirc â +Acirc  +adotab ȧ +ae æ +aeacu ǽ +aebreve æ̆ +aecirc æ̂ +aedotab æ̇ +aemac ǣ +aemacbreve æ̆ +aetilte æ̃ +Ae Æ +Aeacu Ǽ +Aemac Ǣ +aepr ɶ +afrown â +Afrown  agrave à +Agrave À +ahacek ǎ +ahook ả +ahookmac ả̄ +aisub ᾳ +aisubacu ᾴ +aisubfrown ᾰͅ +aisubtilde ᾷ +alenis ἀ +Alenis Ἀ +alenisacu ἄ +Alenisacu Ἄ +alenisfrown ἆ +alenisgrave ἂ +alenisisub ᾀ +alenisisubacu ᾄ +alenisisubfrown ᾆ +alenismac ᾱ̓ +alpha α +Alpha Α +amac ā +Amac Ā +amacacu ā́ +amacbreve ā̆ +amacdotab ā̇ +amactilde ā̃ +amacundl ā̲ +asg ᵹ +Asg Ᵹ +atilde ã auml ä +Auml Ä +aundl a̲ +ayin ʿ +ayindotabove ʿ̇ +bbar ƀ +beta β +Beta Β +bundl b̲ +cacu ć +cbr } +cbreve c̆ +Cbreve C̆ +cced ç +Cced Ç +ccirc ĉ +cdotab ċ +cdotbl c̣ +chacek č +Chacek Č +chacekdotab č̇ +chi χ +Chi Χ +chook ƈ +cmac c̄ +ctilde c̃ +cyra а +cyrd д +cyre э +cyrhard ъ +cyrjat ѣ +cyrm м +cyrn н +cyrO О +cyrP П +cyrr р +cyrsoft ь +cyrt т +cyry ы +dbar đ +dced ḑ +ddotab ḋ +ddotbl ḍ +Ddotbl ḍ +delta δ +Delta Δ +devdh ध +devph फ +devrfls ष +devrt ट +devrth ठ +devt त +devth थ +digamma ͷ +dlessi ı +dlessj3 ȷ +dtilde d̃ +dundl ḏ eacu é +Eacu É +eacudotbl ẹ́ +easper ἑ +Easper Ἑ +easperacu ἕ +Easperacu Ἕ +easpergrave ἓ +ebreve ĕ +ecirc ê +Ecirc Ê egrave è -oacu ó -ocirc ô - -# TODO -# edh - unknown -# ygh - some variant of y -# thbar - probably th with bar above -# asg +edh ð +Edh Ð +edotab ė +edotabacu ė́ +edotabtilde ė̃ +edotbl ẹ +Edotbl Ẹ +edotblmac ẹ̄ +egrave è +egsampi ͳ +egy3 ꜣ +egyasper ꜥ +ehacek ě +ehook ẻ +Ehook Ẻ +ehookacu ẻ́ +ehookmac ẻ̄ +ehookmacbreve ẻ̄̆ +emac ē +Emac Ē +emacacu ḗ +emacbreve ē̆ +emachacek ē̌ +epsilon ε +Epsilon Ε +epsiloncirc ε̂ +eszett ß +eta η +Eta Η +etaisub ῃ +etilde ẽ +euml ë +eumlacu ë́ +eundl e̱ +eundlacu é̱ +fata ɑ +fatatilde ɑ̃ +ffilig ffi +ffllig ffl +fllig fl +fsigma ς +gaacu ά +gabreve ᾰ +gacu ǵ +gafrown ᾶ +gagrave ὰ +gamac ᾱ +gamacbreve ᾱ̆ +gamma γ +Gamma Γ +gaumlisub αͺ̈ +gbreve ğ +Gbreve Ğ +gcirc ĝ +gdotab ġ +geacu έ +gegrave ὲ +ghacek ǧ +ghacu ή +ghfrown ῆ +ghgrave ὴ +ghmac η̄ +giacu ί +gibreve ῐ +gibreveacu ῐ́ +gifrown ῖ +gigrave ὶ +gimac ῑ +gimacacu ῑ́ +giuml ϊ +giumlacu ΐ +giumlgrave ῒ +glagjat ⱑ +glagjeri ⱐ +glagjeru ⱏ +glots ʔ +gmac ḡ +goacu ό +gobreve ο̆ +gograve ὸ +gt > +guacu ύ +guacumac ύ̄ +gubreve ῠ +gufrown ῦ +gugrave ὺ +gumac ῡ +gumacacu ῡ́ +gundl g̲ +Gundl G̲ +guuml ϋ +guumlacu ΰ +gwacu ώ +gwfrown ῶ +gwgrave ὼ -- cgit v1.2.3