diff options
Diffstat (limited to 'xmlcleanup.sed')
-rw-r--r-- | xmlcleanup.sed | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/xmlcleanup.sed b/xmlcleanup.sed new file mode 100644 index 0000000..f302b11 --- /dev/null +++ b/xmlcleanup.sed @@ -0,0 +1,39 @@ +#!/bin/sed -f + +# unfortunately they use latin-1 rather than ascii... +# note \x is a gnu extension +s/\xA3/£/g + +# seemingly unneeded control characters +s/&ff[0-9];//g + +# <q> corresponds to a quote +s/<q>/\n\n/g + +# quote text start and end points +s/<qt>/“/g +s/<\/qt>/”/g + +# new paragraph +s/∥/\n\n/g + +# a date +s/<d>//g +s/<\/d>/:/g + +# brackets +s/&obr;/{/g +s/&cbr;/}/g + +# space +s/ / /g + +# superscript +s/<sup>/ /g +s/<\/sup>//g + +# xNNNN codes (unknown what the NNNN refers to; not unicode) +s/<x2145>/`/g +s/<x2146>/'/g +s/<x2150>/-/g +s/<xA043>/../g |