diff options
Diffstat (limited to 'extras/mkocrpdf-cuneform.sh')
| -rw-r--r-- | extras/mkocrpdf-cuneform.sh | 21 | 
1 files changed, 21 insertions, 0 deletions
| diff --git a/extras/mkocrpdf-cuneform.sh b/extras/mkocrpdf-cuneform.sh new file mode 100644 index 0000000..94e10eb --- /dev/null +++ b/extras/mkocrpdf-cuneform.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Makes a pdf with embedded text as extracted by cuneiform +# +# Requires imagemagick, pdftk, hocr2pdf and cuneiform + +for i in `ls *png` +do +	a=`basename $i .png` +	echo processing $a + +	convert $i $a.bmp +	cuneiform -f hocr -o $a.html $a.bmp +	rm -f $a.bmp + +	# hocr2pdf has a habit of segfaulting, so fall back to convert +	hocr2pdf -i $i -o $a.pdf < $a.html || convert $i $a.pdf +	rm -f $a.html +done + +pdftk *pdf cat output book.pdf | 
