# pdf-segmented: Generate PDFs using separate compression for foreground and background # Copyright (C) 2025 Lee Yingtong Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from ..compression import CompressedLayer, CompressedPage from ..compression.jbig2 import JBIG2Layer from ..compression.jp2 import JP2Layer from ..compression.jpeg import JPEGLayer from ..input import InputPages from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream from typing import Generator def pdf_write_pages( input_pages: InputPages, compressed_pages: Generator[CompressedPage], output_file: str ) -> None: # Get size of image in PostScript points width_pt = input_pages.width / input_pages.dpi * 72 height_pt = input_pages.height / input_pages.dpi * 72 # Build PDF pdf = Pdf.new() # Write each page for compressed_page in compressed_pages: page = pdf.add_blank_page(page_size=(width_pt, height_pt)) # Write each layer to the page content_instructions = [] pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions) pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions) # Generate content stream wrapped_instructions = [ ContentStreamInstruction([], Operator('q')), ContentStreamInstruction([width_pt, 0, 0, height_pt, 0, 0], Operator('cm')) ] + content_instructions + [ ContentStreamInstruction([], Operator('Q')), ] content_stream = unparse_content_stream(wrapped_instructions) page.Contents.write(content_stream) # Save PDF pdf.save(output_file) def pdf_write_layer( input_pages: InputPages, pdf: Pdf, page: Page, layer: CompressedLayer, content_instructions, ) -> None: # Write the layer to PDF if isinstance(layer, JBIG2Layer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceGray, Filter=Name.JBIG2Decode, BitsPerComponent=1, Mask=[1, 1] # Layer mask ) elif isinstance(layer, JP2Layer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.JPXDecode, BitsPerComponent=8 ) elif isinstance(layer, JPEGLayer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.DCTDecode, BitsPerComponent=8 ) else: raise NotImplementedError() def pdf_write_image( input_pages: InputPages, pdf: Pdf, page: Page, value: bytes, content_instructions, **kwargs ) -> None: # Write the layer as an Image # Insert the Image as an XObject resource xobj = Stream( pdf, value, Type=Name.XObject, Subtype=Name.Image, Width=input_pages.width, Height=input_pages.height, **kwargs ) xobj_name = page.add_resource(xobj, '/XObject') # Add render instruction to the content stream content_instructions.append( ContentStreamInstruction([xobj_name], Operator('Do')) )