Implement PNG compression

This commit is contained in:
RunasSudo 2025-05-08 23:15:05 +10:00
parent cf4afa44a3
commit 60c6ee92e5
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
3 changed files with 49 additions and 5 deletions

View File

@ -28,8 +28,8 @@ parser.add_argument('input_file')
parser.add_argument('output_file')
parser.add_argument('--input-format', choices=['xcf'])
parser.add_argument('--output-format', choices=['pdf'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2', 'png'])
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2', 'png'])
parser.add_argument('--jp2-lossless', action='store_true')
parser.add_argument('--jp2-rate', type=float)
parser.add_argument('--jpeg-quality', type=float)

View File

@ -24,6 +24,7 @@ class CompressedLayer:
from .jbig2 import jbig2_compress_layer
from .jp2 import jp2_compress_layer
from .jpeg import jpeg_compress_layer
from .png import png_compress_layer
from ..segmentation import SegmentedPage
from PIL import Image
@ -73,12 +74,14 @@ def compress_page(
fg=compress_layer(
layer=segmented_page.fg,
compression=fg_compression,
is_foreground=True,
options=options,
tempdir=tempdir
),
bg=compress_layer(
layer=segmented_page.bg,
compression=bg_compression,
is_foreground=False,
options=options,
tempdir=tempdir
)
@ -87,6 +90,7 @@ def compress_page(
def compress_layer(
layer: Image,
compression: str,
is_foreground: bool,
options: CompressionOptions,
tempdir: str
) -> CompressedLayer:
@ -98,5 +102,7 @@ def compress_layer(
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
elif compression == 'jpeg':
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
elif compression == 'png':
return png_compress_layer(layer=layer, is_foreground=is_foreground)
else:
raise NotImplementedError()

View File

@ -18,9 +18,10 @@ from ..compression import CompressedLayer, CompressedPage
from ..compression.jbig2 import JBIG2Layer
from ..compression.jp2 import JP2Layer
from ..compression.jpeg import JPEGLayer
from ..compression.png import PNGLayer
from ..input import InputPages
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream
from typing import Generator
@ -42,8 +43,8 @@ def pdf_write_pages(
# Write each layer to the page
content_instructions = []
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
# Generate content stream
wrapped_instructions = [
@ -63,6 +64,7 @@ def pdf_write_layer(
pdf: Pdf,
page: Page,
layer: CompressedLayer,
is_foreground: bool,
content_instructions,
) -> None:
@ -101,6 +103,42 @@ def pdf_write_layer(
Filter=Name.DCTDecode,
BitsPerComponent=8
)
elif isinstance(layer, PNGLayer):
if is_foreground:
# See PDF 1.7 section 7.4.4.3
# See also the implementation in img2pdf
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceGray,
Filter=Name.FlateDecode,
BitsPerComponent=1,
Mask=[1, 1], # Layer mask
DecodeParms=Dictionary(
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
BitsPerComponent=1, # Default is 8 so must set this here
Columns=input_pages.width
)
)
else:
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
Filter=Name.FlateDecode,
BitsPerComponent=8,
DecodeParms=Dictionary(
Predictor=15,
Colors=3, # Default is 1 so must set this here
Columns=input_pages.width
)
)
else:
raise NotImplementedError()