From b0055fe870a04fbd8eaef669c1ccfb0febfe8bc3 Mon Sep 17 00:00:00 2001 From: Nick White Date: Sun, 26 May 2013 13:06:06 +0100 Subject: Convert to proper UTF-8, separate word type stuff from definitions --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 4927483..10c4fb3 100644 --- a/Makefile +++ b/Makefile @@ -4,12 +4,15 @@ SRC = 1.htm 2.htm 3.htm 4.htm 5.htm \ 6.htm 7.htm 8.htm 9.htm 10.htm JARGONS = $(SRC:.htm=.jargon) -$(JARGONS): htmtojargon.awk symbols.sed xmlcleanup.sed xmlcleanup2.sed +$(JARGONS): htmtojargon.awk separateheadmeta.sed symbols.sed \ + xmlcleanup.sed xmlcleanup2.sed all: oed.jargon oed.dict.dz .htm.jargon: - awk -f htmtojargon.awk < "$<" \ + iconv -f ISO-8859-1 -t UTF-8 < "$<" \ + | awk -f htmtojargon.awk \ + | sed -f separateheadmeta.sed \ | sed -f symbols.sed \ | sed -f xmlcleanup.sed \ | sed -f xmlcleanup2.sed \ @@ -24,7 +27,8 @@ oed.jargon: $(JARGONS) oed.dict: oed.jargon cat $< \ | dictfmt -j --utf8 \ - --columns 0 --headword-separator ' ' \ + --columns 0 --headword-separator ',' \ + --index-data-separator ';' \ -u http://njw.me.uk/oed \ -s "Oxford English Dictionary, 2nd Edition" \ oed -- cgit v1.2.3