summaryrefslogtreecommitdiff
path: root/extras/mkocrpdf-cuneform.sh
blob: 94e10eb5cc20348967c2ca154947fec430c1c1fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/bin/sh
#
# Makes a pdf with embedded text as extracted by cuneiform
#
# Requires imagemagick, pdftk, hocr2pdf and cuneiform

for i in `ls *png`
do
	a=`basename $i .png`
	echo processing $a

	convert $i $a.bmp
	cuneiform -f hocr -o $a.html $a.bmp
	rm -f $a.bmp

	# hocr2pdf has a habit of segfaulting, so fall back to convert
	hocr2pdf -i $i -o $a.pdf < $a.html || convert $i $a.pdf
	rm -f $a.html
done

pdftk *pdf cat output book.pdf