summaryrefslogtreecommitdiff
path: root/xmlcleanup2.sed
blob: 5352a9296b92a0afe97198a13e7ea2ca8014ca3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/sed -f
# sed rules that must run after the main set

# <e> tags seem to duplicate <v> tags for some quotes
# NOTE this assumes e tags only come at end of lines
s/<e>.*$//g
# TODO: make this work instead of the above
#s/<e>.*?<\/e>//g

# any xml tags not processed can just go away
s/<[^>]*>//g
# any xml character entities not processed can just go away
s/&[^;]*;//g