Implement PNG compression
This commit is contained in:
parent
f6cbe3215b
commit
4abbe79d5a
@ -28,8 +28,8 @@ parser.add_argument('input_file')
|
|||||||
parser.add_argument('output_file')
|
parser.add_argument('output_file')
|
||||||
parser.add_argument('--input-format', choices=['xcf'])
|
parser.add_argument('--input-format', choices=['xcf'])
|
||||||
parser.add_argument('--output-format', choices=['pdf'])
|
parser.add_argument('--output-format', choices=['pdf'])
|
||||||
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
|
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2', 'png'])
|
||||||
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
|
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2', 'png'])
|
||||||
parser.add_argument('--jp2-lossless', action='store_true')
|
parser.add_argument('--jp2-lossless', action='store_true')
|
||||||
parser.add_argument('--jp2-rate', type=float)
|
parser.add_argument('--jp2-rate', type=float)
|
||||||
parser.add_argument('--jpeg-quality', type=float)
|
parser.add_argument('--jpeg-quality', type=float)
|
||||||
|
@ -24,6 +24,7 @@ class CompressedLayer:
|
|||||||
from .jbig2 import jbig2_compress_layer
|
from .jbig2 import jbig2_compress_layer
|
||||||
from .jp2 import jp2_compress_layer
|
from .jp2 import jp2_compress_layer
|
||||||
from .jpeg import jpeg_compress_layer
|
from .jpeg import jpeg_compress_layer
|
||||||
|
from .png import png_compress_layer
|
||||||
from ..segmentation import SegmentedPage
|
from ..segmentation import SegmentedPage
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@ -73,12 +74,14 @@ def compress_page(
|
|||||||
fg=compress_layer(
|
fg=compress_layer(
|
||||||
layer=segmented_page.fg,
|
layer=segmented_page.fg,
|
||||||
compression=fg_compression,
|
compression=fg_compression,
|
||||||
|
is_foreground=True,
|
||||||
options=options,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
),
|
),
|
||||||
bg=compress_layer(
|
bg=compress_layer(
|
||||||
layer=segmented_page.bg,
|
layer=segmented_page.bg,
|
||||||
compression=bg_compression,
|
compression=bg_compression,
|
||||||
|
is_foreground=False,
|
||||||
options=options,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
)
|
)
|
||||||
@ -87,6 +90,7 @@ def compress_page(
|
|||||||
def compress_layer(
|
def compress_layer(
|
||||||
layer: Image,
|
layer: Image,
|
||||||
compression: str,
|
compression: str,
|
||||||
|
is_foreground: bool,
|
||||||
options: CompressionOptions,
|
options: CompressionOptions,
|
||||||
tempdir: str
|
tempdir: str
|
||||||
) -> CompressedLayer:
|
) -> CompressedLayer:
|
||||||
@ -98,5 +102,7 @@ def compress_layer(
|
|||||||
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
|
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
|
||||||
elif compression == 'jpeg':
|
elif compression == 'jpeg':
|
||||||
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
|
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
|
||||||
|
elif compression == 'png':
|
||||||
|
return png_compress_layer(layer=layer, is_foreground=is_foreground)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
61
pdf_segmented/compression/png.py
Normal file
61
pdf_segmented/compression/png.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# pdf-segmented: Generate PDFs using separate compression for foreground and background
|
||||||
|
# Copyright (C) 2025 Lee Yingtong Li
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from . import CompressedLayer
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import io
|
||||||
|
import struct
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PNGLayer(CompressedLayer):
|
||||||
|
data: bytes
|
||||||
|
|
||||||
|
def get_flate_data(self):
|
||||||
|
# Parse PNG data to get the IDAT chunks
|
||||||
|
bytesio = io.BytesIO(self.data)
|
||||||
|
bytesio.read(8) # Read PNG header
|
||||||
|
|
||||||
|
flate_data = bytearray()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Read PNG chunks
|
||||||
|
length_bytes = bytesio.read(4)
|
||||||
|
if length_bytes == b'': # EOF
|
||||||
|
break
|
||||||
|
length = struct.unpack('>I', length_bytes)[0]
|
||||||
|
cid = bytesio.read(4)
|
||||||
|
data = bytesio.read(length)
|
||||||
|
crc = bytesio.read(4)
|
||||||
|
|
||||||
|
# IDAT chunk contains DEFLATE data
|
||||||
|
if cid == b'IDAT':
|
||||||
|
flate_data.extend(data)
|
||||||
|
|
||||||
|
return bytes(flate_data)
|
||||||
|
|
||||||
|
def png_compress_layer(layer: Image, is_foreground: bool) -> PNGLayer:
|
||||||
|
if is_foreground:
|
||||||
|
# Foreground is 1bpp
|
||||||
|
layer = layer.convert('1')
|
||||||
|
|
||||||
|
# Save image to PNG
|
||||||
|
bytesio = io.BytesIO()
|
||||||
|
layer.save(bytesio, format='png', optimize=True)
|
||||||
|
|
||||||
|
return PNGLayer(data=bytesio.getvalue())
|
@ -18,9 +18,10 @@ from ..compression import CompressedLayer, CompressedPage
|
|||||||
from ..compression.jbig2 import JBIG2Layer
|
from ..compression.jbig2 import JBIG2Layer
|
||||||
from ..compression.jp2 import JP2Layer
|
from ..compression.jp2 import JP2Layer
|
||||||
from ..compression.jpeg import JPEGLayer
|
from ..compression.jpeg import JPEGLayer
|
||||||
|
from ..compression.png import PNGLayer
|
||||||
from ..input import InputPages
|
from ..input import InputPages
|
||||||
|
|
||||||
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
|
from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream
|
||||||
|
|
||||||
from typing import Generator
|
from typing import Generator
|
||||||
|
|
||||||
@ -42,8 +43,8 @@ def pdf_write_pages(
|
|||||||
|
|
||||||
# Write each layer to the page
|
# Write each layer to the page
|
||||||
content_instructions = []
|
content_instructions = []
|
||||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
|
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
|
||||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
|
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
|
||||||
|
|
||||||
# Generate content stream
|
# Generate content stream
|
||||||
wrapped_instructions = [
|
wrapped_instructions = [
|
||||||
@ -63,6 +64,7 @@ def pdf_write_layer(
|
|||||||
pdf: Pdf,
|
pdf: Pdf,
|
||||||
page: Page,
|
page: Page,
|
||||||
layer: CompressedLayer,
|
layer: CompressedLayer,
|
||||||
|
is_foreground: bool,
|
||||||
content_instructions,
|
content_instructions,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
@ -101,6 +103,42 @@ def pdf_write_layer(
|
|||||||
Filter=Name.DCTDecode,
|
Filter=Name.DCTDecode,
|
||||||
BitsPerComponent=8
|
BitsPerComponent=8
|
||||||
)
|
)
|
||||||
|
elif isinstance(layer, PNGLayer):
|
||||||
|
if is_foreground:
|
||||||
|
# See PDF 1.7 section 7.4.4.3
|
||||||
|
# See also the implementation in img2pdf
|
||||||
|
pdf_write_image(
|
||||||
|
input_pages=input_pages,
|
||||||
|
pdf=pdf,
|
||||||
|
page=page,
|
||||||
|
value=layer.get_flate_data(),
|
||||||
|
content_instructions=content_instructions,
|
||||||
|
ColorSpace=Name.DeviceGray,
|
||||||
|
Filter=Name.FlateDecode,
|
||||||
|
BitsPerComponent=1,
|
||||||
|
Mask=[1, 1], # Layer mask
|
||||||
|
DecodeParms=Dictionary(
|
||||||
|
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
|
||||||
|
BitsPerComponent=1, # Default is 8 so must set this here
|
||||||
|
Columns=input_pages.width
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
pdf_write_image(
|
||||||
|
input_pages=input_pages,
|
||||||
|
pdf=pdf,
|
||||||
|
page=page,
|
||||||
|
value=layer.get_flate_data(),
|
||||||
|
content_instructions=content_instructions,
|
||||||
|
ColorSpace=Name.DeviceRGB,
|
||||||
|
Filter=Name.FlateDecode,
|
||||||
|
BitsPerComponent=8,
|
||||||
|
DecodeParms=Dictionary(
|
||||||
|
Predictor=15,
|
||||||
|
Colors=3, # Default is 1 so must set this here
|
||||||
|
Columns=input_pages.width
|
||||||
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user