From 2742524159ad2bc861711df084f6bd77588e9e9b Mon Sep 17 00:00:00 2001 From: Nick White Date: Sun, 31 Mar 2013 18:32:08 +0100 Subject: Initial commit --- xmlcleanup.sed | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 xmlcleanup.sed (limited to 'xmlcleanup.sed') diff --git a/xmlcleanup.sed b/xmlcleanup.sed new file mode 100644 index 0000000..f302b11 --- /dev/null +++ b/xmlcleanup.sed @@ -0,0 +1,39 @@ +#!/bin/sed -f + +# unfortunately they use latin-1 rather than ascii... +# note \x is a gnu extension +s/\xA3/£/g + +# seemingly unneeded control characters +s/&ff[0-9];//g + +# corresponds to a quote +s//\n\n/g + +# quote text start and end points +s//“/g +s/<\/qt>/”/g + +# new paragraph +s/∥/\n\n/g + +# a date +s///g +s/<\/d>/:/g + +# brackets +s/&obr;/{/g +s/&cbr;/}/g + +# space +s/ / /g + +# superscript +s// /g +s/<\/sup>//g + +# xNNNN codes (unknown what the NNNN refers to; not unicode) +s//`/g +s//'/g +s//-/g +s//../g -- cgit v1.2.3