summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2013-05-26 13:06:06 +0100
committerNick White <git@njw.me.uk>2013-05-26 13:06:06 +0100
commitb0055fe870a04fbd8eaef669c1ccfb0febfe8bc3 (patch)
treeffaea14138b844e5645ad952a1d54622c15ff7b9 /Makefile
parentb2572f6fa6dbe9e810cbdfcc96f50f04b2fc139c (diff)
downloadoed2dict-b0055fe870a04fbd8eaef669c1ccfb0febfe8bc3.tar.bz2
oed2dict-b0055fe870a04fbd8eaef669c1ccfb0febfe8bc3.zip
Convert to proper UTF-8, separate word type stuff from definitions
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile10
1 files changed, 7 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 4927483..10c4fb3 100644
--- a/Makefile
+++ b/Makefile
@@ -4,12 +4,15 @@ SRC = 1.htm 2.htm 3.htm 4.htm 5.htm \
6.htm 7.htm 8.htm 9.htm 10.htm
JARGONS = $(SRC:.htm=.jargon)
-$(JARGONS): htmtojargon.awk symbols.sed xmlcleanup.sed xmlcleanup2.sed
+$(JARGONS): htmtojargon.awk separateheadmeta.sed symbols.sed \
+ xmlcleanup.sed xmlcleanup2.sed
all: oed.jargon oed.dict.dz
.htm.jargon:
- awk -f htmtojargon.awk < "$<" \
+ iconv -f ISO-8859-1 -t UTF-8 < "$<" \
+ | awk -f htmtojargon.awk \
+ | sed -f separateheadmeta.sed \
| sed -f symbols.sed \
| sed -f xmlcleanup.sed \
| sed -f xmlcleanup2.sed \
@@ -24,7 +27,8 @@ oed.jargon: $(JARGONS)
oed.dict: oed.jargon
cat $< \
| dictfmt -j --utf8 \
- --columns 0 --headword-separator ' ' \
+ --columns 0 --headword-separator ',' \
+ --index-data-separator ';' \
-u http://njw.me.uk/oed \
-s "Oxford English Dictionary, 2nd Edition" \
oed