diff options
Diffstat (limited to 'xmlcleanup2.sed')
-rw-r--r-- | xmlcleanup2.sed | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/xmlcleanup2.sed b/xmlcleanup2.sed new file mode 100644 index 0000000..5352a92 --- /dev/null +++ b/xmlcleanup2.sed @@ -0,0 +1,13 @@ +#!/bin/sed -f +# sed rules that must run after the main set + +# <e> tags seem to duplicate <v> tags for some quotes +# NOTE this assumes e tags only come at end of lines +s/<e>.*$//g +# TODO: make this work instead of the above +#s/<e>.*?<\/e>//g + +# any xml tags not processed can just go away +s/<[^>]*>//g +# any xml character entities not processed can just go away +s/&[^;]*;//g |