134 lines
3.8 KiB
Python
134 lines
3.8 KiB
Python
# pdf-segmented: Generate PDFs using separate compression for foreground and background
|
|
# Copyright (C) 2025 Lee Yingtong Li
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
from ..compression import CompressedLayer, CompressedPage
|
|
from ..compression.jbig2 import JBIG2Layer
|
|
from ..compression.jp2 import JP2Layer
|
|
from ..compression.jpeg import JPEGLayer
|
|
from ..input import InputPages
|
|
|
|
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
|
|
|
|
from typing import Generator
|
|
|
|
def pdf_write_pages(
|
|
input_pages: InputPages,
|
|
compressed_pages: Generator[CompressedPage],
|
|
output_file: str
|
|
) -> None:
|
|
# Get size of image in PostScript points
|
|
width_pt = input_pages.width / input_pages.dpi * 72
|
|
height_pt = input_pages.height / input_pages.dpi * 72
|
|
|
|
# Build PDF
|
|
pdf = Pdf.new()
|
|
|
|
# Write each page
|
|
for compressed_page in compressed_pages:
|
|
page = pdf.add_blank_page(page_size=(width_pt, height_pt))
|
|
|
|
# Write each layer to the page
|
|
content_instructions = []
|
|
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
|
|
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
|
|
|
|
# Generate content stream
|
|
wrapped_instructions = [
|
|
ContentStreamInstruction([], Operator('q')),
|
|
ContentStreamInstruction([width_pt, 0, 0, height_pt, 0, 0], Operator('cm'))
|
|
] + content_instructions + [
|
|
ContentStreamInstruction([], Operator('Q')),
|
|
]
|
|
content_stream = unparse_content_stream(wrapped_instructions)
|
|
page.Contents.write(content_stream)
|
|
|
|
# Save PDF
|
|
pdf.save(output_file)
|
|
|
|
def pdf_write_layer(
|
|
input_pages: InputPages,
|
|
pdf: Pdf,
|
|
page: Page,
|
|
layer: CompressedLayer,
|
|
content_instructions,
|
|
) -> None:
|
|
|
|
# Write the layer to PDF
|
|
if isinstance(layer, JBIG2Layer):
|
|
pdf_write_image(
|
|
input_pages=input_pages,
|
|
pdf=pdf,
|
|
page=page,
|
|
value=layer.data,
|
|
content_instructions=content_instructions,
|
|
ColorSpace=Name.DeviceGray,
|
|
Filter=Name.JBIG2Decode,
|
|
BitsPerComponent=1,
|
|
Mask=[1, 1] # Layer mask
|
|
)
|
|
elif isinstance(layer, JP2Layer):
|
|
pdf_write_image(
|
|
input_pages=input_pages,
|
|
pdf=pdf,
|
|
page=page,
|
|
value=layer.data,
|
|
content_instructions=content_instructions,
|
|
ColorSpace=Name.DeviceRGB,
|
|
Filter=Name.JPXDecode,
|
|
BitsPerComponent=8
|
|
)
|
|
elif isinstance(layer, JPEGLayer):
|
|
pdf_write_image(
|
|
input_pages=input_pages,
|
|
pdf=pdf,
|
|
page=page,
|
|
value=layer.data,
|
|
content_instructions=content_instructions,
|
|
ColorSpace=Name.DeviceRGB,
|
|
Filter=Name.DCTDecode,
|
|
BitsPerComponent=8
|
|
)
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
def pdf_write_image(
|
|
input_pages: InputPages,
|
|
pdf: Pdf,
|
|
page: Page,
|
|
value: bytes,
|
|
content_instructions,
|
|
**kwargs
|
|
) -> None:
|
|
|
|
# Write the layer as an Image
|
|
|
|
# Insert the Image as an XObject resource
|
|
xobj = Stream(
|
|
pdf,
|
|
value,
|
|
Type=Name.XObject,
|
|
Subtype=Name.Image,
|
|
Width=input_pages.width,
|
|
Height=input_pages.height,
|
|
**kwargs
|
|
)
|
|
xobj_name = page.add_resource(xobj, '/XObject')
|
|
|
|
# Add render instruction to the content stream
|
|
content_instructions.append(
|
|
ContentStreamInstruction([xobj_name], Operator('Do'))
|
|
)
|