Source code for gws.tools.pdf

import PyPDF2

import gws
import gws.tools.os2
import gws.tools.units


[docs]def render_html(html, page_size, margin, out_path): if 'charset' not in html: html = '<meta charset="utf8"/>' + html gws.write_file_b(out_path + '.html', gws.as_bytes(html)) if not margin: margin = [0, 0, 0, 0] cmd = [ 'wkhtmltopdf', '--disable-javascript', '--disable-smart-shrinking', '--dpi', str(gws.tools.units.PDF_DPI), '--margin-top', str(margin[0]), '--margin-right', str(margin[1]), '--margin-bottom', str(margin[2]), '--margin-left', str(margin[3]), '--page-width', str(page_size[0]), '--page-height', str(page_size[1]), 'page', out_path + '.html', out_path, ] gws.log.debug(cmd) gws.tools.os2.run(cmd, echo=False) return out_path
[docs]def render_html_to_png(html, page_size, margin, out_path): if margin: html = f""" <body style="margin:{margin[0]}px {margin[1]}px {margin[2]}px {margin[3]}px"> {html} </body> """ if 'charset' not in html: html = '<meta charset="utf8"/>' + html gws.write_file_b(out_path + '.html', gws.as_bytes(html)) cmd = [ 'wkhtmltoimage', '--disable-javascript', '--disable-smart-width', '--width', str(page_size[0]), '--height', str(page_size[1]), '--crop-w', str(page_size[0]), '--crop-h', str(page_size[1]), '--transparent', out_path + '.html', out_path, ] gws.log.debug(cmd) gws.tools.os2.run(cmd, echo=False) return out_path
[docs]def merge(a_path, b_path, out_path): fa = open(a_path, 'rb') fb = open(b_path, 'rb') ra = PyPDF2.PdfFileReader(fa) rb = PyPDF2.PdfFileReader(fb) w = PyPDF2.PdfFileWriter() for n in range(ra.getNumPages()): page = ra.getPage(n) page.mergePage(rb.getPage(n)) w.addPage(page) with open(out_path, 'wb') as out_fp: w.write(out_fp) fa.close() fb.close() return out_path
[docs]def concat(paths, out_path): # only one path given - just return it if len(paths) == 1: return paths[0] # NB: readers must be kept around until the writer is done files = [open(p, 'rb') for p in paths] readers = [PyPDF2.PdfFileReader(fp) for fp in files] w = PyPDF2.PdfFileWriter() for r in readers: w.appendPagesFromReader(r) with open(out_path, 'wb') as out_fp: w.write(out_fp) for fp in files: fp.close() return out_path
[docs]def page_count(path): with open(path, 'rb') as fp: r = PyPDF2.PdfFileReader(fp) return r.getNumPages()
[docs]def to_image(in_path, out_path, size, format): if format == 'png': device = 'png16m' elif format == 'jpeg' or format == 'jpg': device = 'jpeg' else: raise ValueError(f'uknown format {format!r}') cmd = [ 'gs', '-q', f'-dNOPAUSE', f'-dBATCH', f'-dDEVICEWIDTHPOINTS={size[0]}', f'-dDEVICEHEIGHTPOINTS={size[1]}', f'-dPDFFitPage=true', f'-sDEVICE={device}', f'-dTextAlphaBits=4', f'-dGraphicsAlphaBits=4', f'-sOutputFile={out_path}', in_path, ] gws.log.debug(cmd) gws.tools.os2.run(cmd, echo=False) return out_path