summaryrefslogtreecommitdiff
path: root/xmlcleanup2.sed
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2013-03-31 18:32:08 +0100
committerNick White <git@njw.me.uk>2013-03-31 18:32:08 +0100
commit2742524159ad2bc861711df084f6bd77588e9e9b (patch)
tree00508f5d2b5232a8d10298acb59facf86f6f5ff9 /xmlcleanup2.sed
downloadoed2dict-2742524159ad2bc861711df084f6bd77588e9e9b.tar.bz2
oed2dict-2742524159ad2bc861711df084f6bd77588e9e9b.zip
Initial commit
Diffstat (limited to 'xmlcleanup2.sed')
-rw-r--r--xmlcleanup2.sed13
1 files changed, 13 insertions, 0 deletions
diff --git a/xmlcleanup2.sed b/xmlcleanup2.sed
new file mode 100644
index 0000000..5352a92
--- /dev/null
+++ b/xmlcleanup2.sed
@@ -0,0 +1,13 @@
+#!/bin/sed -f
+# sed rules that must run after the main set
+
+# <e> tags seem to duplicate <v> tags for some quotes
+# NOTE this assumes e tags only come at end of lines
+s/<e>.*$//g
+# TODO: make this work instead of the above
+#s/<e>.*?<\/e>//g
+
+# any xml tags not processed can just go away
+s/<[^>]*>//g
+# any xml character entities not processed can just go away
+s/&[^;]*;//g