From 5d0c5974e2655a6f66153bffb67c9346c2c4a589 Mon Sep 17 00:00:00 2001 From: Nick White Date: Sun, 26 May 2013 21:52:41 +0100 Subject: Correct e tag removal, correct headword separation --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 10c4fb3..cfb4484 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,10 @@ all: oed.jargon oed.dict.dz .htm.jargon: iconv -f ISO-8859-1 -t UTF-8 < "$<" \ + | sed 's/\r//g' \ | awk -f htmtojargon.awk \ - | sed -f separateheadmeta.sed \ | sed -f symbols.sed \ + | perl -p removeetags.pl \ | sed -f xmlcleanup.sed \ | sed -f xmlcleanup2.sed \ > "$@" @@ -28,7 +29,7 @@ oed.dict: oed.jargon cat $< \ | dictfmt -j --utf8 \ --columns 0 --headword-separator ',' \ - --index-data-separator ';' \ + --index-data-separator " " \ -u http://njw.me.uk/oed \ -s "Oxford English Dictionary, 2nd Edition" \ oed -- cgit v1.2.3