summaryrefslogtreecommitdiff
path: root/xmlcleanup2.sed
diff options
context:
space:
mode:
Diffstat (limited to 'xmlcleanup2.sed')
-rw-r--r--xmlcleanup2.sed13
1 files changed, 13 insertions, 0 deletions
diff --git a/xmlcleanup2.sed b/xmlcleanup2.sed
new file mode 100644
index 0000000..5352a92
--- /dev/null
+++ b/xmlcleanup2.sed
@@ -0,0 +1,13 @@
+#!/bin/sed -f
+# sed rules that must run after the main set
+
+# <e> tags seem to duplicate <v> tags for some quotes
+# NOTE this assumes e tags only come at end of lines
+s/<e>.*$//g
+# TODO: make this work instead of the above
+#s/<e>.*?<\/e>//g
+
+# any xml tags not processed can just go away
+s/<[^>]*>//g
+# any xml character entities not processed can just go away
+s/&[^;]*;//g