109 lines
3.0 KiB
Python
109 lines
3.0 KiB
Python
# pdf-segmented: Generate PDFs using separate compression for foreground and background
|
|
# Copyright (C) 2025 Lee Yingtong Li
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
class CompressedLayer:
|
|
# Superclass for all compressed layer types (JPEG, JBIG2, etc.)
|
|
|
|
def cleanup():
|
|
# Clean up any temporary files, etc.
|
|
pass
|
|
|
|
from .jbig2 import jbig2_compress_layer
|
|
from .jp2 import jp2_compress_layer
|
|
from .jpeg import jpeg_compress_layer
|
|
from .png import png_compress_layer
|
|
from ..segmentation import SegmentedPage
|
|
|
|
from PIL import Image
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Generator, Iterable, Optional
|
|
|
|
@dataclass
|
|
class CompressionOptions:
|
|
jp2_lossless: bool = False
|
|
jp2_rate: Optional[float] = None
|
|
jpeg_quality: Optional[float] = None
|
|
|
|
@dataclass
|
|
class CompressedPage:
|
|
fg: CompressedLayer
|
|
bg: CompressedLayer
|
|
|
|
def compress_pages(
|
|
segmented_pages: Iterable[SegmentedPage],
|
|
fg_compression: str,
|
|
bg_compression: str,
|
|
options: CompressionOptions,
|
|
tempdir: str
|
|
) -> Generator[CompressedPage]:
|
|
|
|
# Compress foreground and background layers on each segmented page
|
|
for segmented_page in segmented_pages:
|
|
yield compress_page(
|
|
segmented_page=segmented_page,
|
|
fg_compression=fg_compression,
|
|
bg_compression=bg_compression,
|
|
options=options,
|
|
tempdir=tempdir
|
|
)
|
|
|
|
def compress_page(
|
|
segmented_page: SegmentedPage,
|
|
fg_compression: str,
|
|
bg_compression: str,
|
|
options: CompressionOptions,
|
|
tempdir: str
|
|
) -> CompressedPage:
|
|
|
|
# Compress foreground and background layers
|
|
return CompressedPage(
|
|
fg=compress_layer(
|
|
layer=segmented_page.fg,
|
|
compression=fg_compression,
|
|
is_foreground=True,
|
|
options=options,
|
|
tempdir=tempdir
|
|
),
|
|
bg=compress_layer(
|
|
layer=segmented_page.bg,
|
|
compression=bg_compression,
|
|
is_foreground=False,
|
|
options=options,
|
|
tempdir=tempdir
|
|
)
|
|
)
|
|
|
|
def compress_layer(
|
|
layer: Image,
|
|
compression: str,
|
|
is_foreground: bool,
|
|
options: CompressionOptions,
|
|
tempdir: str
|
|
) -> CompressedLayer:
|
|
|
|
# Compress the given layer
|
|
if compression == 'jbig2':
|
|
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
|
|
elif compression == 'jp2':
|
|
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
|
|
elif compression == 'jpeg':
|
|
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
|
|
elif compression == 'png':
|
|
return png_compress_layer(layer=layer, is_foreground=is_foreground)
|
|
else:
|
|
raise NotImplementedError()
|