1 files changed, 12 insertions, 5 deletions
diff --git a/extras/mkocrtxt.sh b/extras/mkocrtxt.sh
index 02f7146..e683459 100644
--- a/extras/mkocrtxt.sh
+++ b/extras/mkocrtxt.sh
@@ -2,25 +2,32 @@
 # See COPYING file for copyright and license details.
 #
 # Makes a text file with text extracted by tesseract
+# Requires imagemagick and tesseract
 #
 # Note: Unfortunately tesseract works much better if one first
 #       makes the image to be OCRed significantly larger. This
 #       script therefore temporarily creates a larger file to
-#       feed to tesseract.
+#       feed to tesseract. These temporary large files are also
+#       converted to tiff format, so that they're readable by
+#       any version of tesseract.
 
-for i in `ls *png`
+for i in `ls *tif`
 do
+	echo "$i"
+
 	# create a much bigger version of the page image
 	width=`identify "$i" | awk '{print $3}' | sed 's/x.*//'`
 	bigwidth=`expr $width \* 4`
-	convert "$i" -geometry ${bigwidth}x "$i.big.png"
+	convert "$i" -geometry ${bigwidth}x "$i.big.tif"
 
 	# scan the page image
-	tesseract "$i.big.png" "$i" 2>&1 | sed '/Tesseract Open Source OCR Engine/d'
+	tesseract "$i.big.tif" "$i" 2>&1 | sed '/Tesseract Open Source OCR Engine/d'
 
 	# combine the page text with the rest of the book
 	cat "$i.txt" >> book.txt
 
 	# remove working files
-	rm -f "$i.big.png" "$i.txt"
+	rm -f "$i.big.tif" "$i.txt"
 done
+
+echo book.txt