![]() compile ( 'NumberOfPages: ' ) metadata = open ( " %s /pdfmetadata.out" % workpath, 'r' ). load ( open ( workpath '/bmarks.out' )), 0 ) # Integrate the parsed bookmarks into the PDF metadata p = re. dest, workpath )) print ( "Original PDF metadata extracted." ) # Parse the sexpr pdfbmarks = walk_bmarks ( sexpdata. system ( "pdftk %s dump_data_utf8 > %s /pdfmetadata.out" % ( args. st_size > 0 : # Extract the metadata from the PDF document retval = retval | os. src, workpath )) print ( "Bookmarks extracted." ) # Check for zero-length outline if os. ![]() system ( "djvused %s -u -e 'print-outline' > %s /bmarks.out" % ( args. src, workpath )) if retval > 0 : print ( " \n NOTE: There was a problem on ddjvu to convert to pdf." ) exit ( retval ) else : print ( "PDF (without toc) already found, use it." ) # Extract the bookmark data from the DJVU document retval = 0 retval = retval | os. system ( "ddjvu -v -format=pdf %s %s /dumpd.pdf" % ( args. isfile ( workpath '/dumpd.pdf' ): retval = os. src ) # Make the PDF, compressing with JPG so they are not ridiculous in size # (cwd) if not os. src ) else : # Record the file we are about to process open ( workpath '/inprocess', 'w' ). src, fname )) exit ( 3 ) else : print ( "NOTE: Continuing to process %s. src : print ( "ERROR: Attempting to process %s before %s is completed. isfile ( workpath '/inprocess' ): fname = open ( workpath '/inprocess', 'r' ). dest = workpath '/dumpd.pdf' # Check for a file presently being processed if os. # Also, stash the temp pdf in the clean spot args. parse_args () # Reescape the filenames because we will just be sending them to commands via system # and we don't otherwise work directly with the DJVU and PDF files. See man ddjvu for more information.' ) args = parser. ![]() add_argument ( '-q, -quality', dest = 'quality', type = int, default = 80, help = 'specify JPEG lossy compression quality (50-150). add_argument ( 'dest', metavar = 'pdffile', type = str, help = 'the destination PDF file' ) parser. add_argument ( 'src', metavar = 'djvufile', type = str, help = 'the source DJVU file' ) parser. Very useful for Sony Digital Paper system' ) parser. ArgumentParser ( description = 'Convert DJVU format to PDF format preserving OCRd text and metadata. getcwd () # From Python docs, nice and slick command line arguments parser = argparse. split ( '#' ) wroteTitle = False else : pass return output workpath = os. # Modified from # License: GNU GPL v3 import sexpdata import argparse import os import pipes import subprocess import re # Recursively walks the sexpr tree and outputs a metadata format understandable by pdftk def walk_bmarks ( bmarks, level ): output = '' wroteTitle = False for j in bmarks : if isinstance ( j, list ): output = output walk_bmarks ( j, level 1 ) elif isinstance ( j, str ): if not wroteTitle : output = output "BookmarkBegin \n BookmarkTitle: %s \n BookmarkLevel: %d \n " % ( j, level ) wroteTitle = True else : output = output "BookmarkPageNumber: %s \n " % j. #!/usr/bin/env python3 # Convert DJVU to PDF with table of contents, if available.
0 Comments
Leave a Reply. |