#!/bin/sed -f # unfortunately they use latin-1 rather than ascii... # note \x is a gnu extension s/\xA3/£/g # seemingly unneeded control characters s/&ff[0-9];//g # <q> corresponds to a quote s/<q>/\n\n/g # quote text start and end points s/<qt>/“/g s/<\/qt>/”/g # new paragraph s/∥/\n\n/g # a date s/<d>//g s/<\/d>/:/g # brackets s/&obr;/{/g s/&cbr;/}/g # space s/ / /g # superscript s/<sup>/ /g s/<\/sup>//g # xNNNN codes (unknown what the NNNN refers to; not unicode) s/<x2145>/`/g s/<x2146>/'/g s/<x2150>/-/g s/<xA043>/../g