103 lines
2.8 KiB
Python

# pdf-segmented: Generate PDFs using separate compression for foreground and background
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
class CompressedLayer:
# Superclass for all compressed layer types (JPEG, JBIG2, etc.)
def cleanup():
# Clean up any temporary files, etc.
pass
from .jbig2 import jbig2_compress_layer
from .jp2 import jp2_compress_layer
from .jpeg import jpeg_compress_layer
from ..segmentation import SegmentedPage
from PIL import Image
from dataclasses import dataclass
from typing import Generator, Iterable, Optional
@dataclass
class CompressionOptions:
jp2_lossless: bool = False
jp2_rate: Optional[float] = None
jpeg_quality: Optional[float] = None
@dataclass
class CompressedPage:
fg: CompressedLayer
bg: CompressedLayer
def compress_pages(
segmented_pages: Iterable[SegmentedPage],
fg_compression: str,
bg_compression: str,
options: CompressionOptions,
tempdir: str
) -> Generator[CompressedPage]:
# Compress foreground and background layers on each segmented page
for segmented_page in segmented_pages:
yield compress_page(
segmented_page=segmented_page,
fg_compression=fg_compression,
bg_compression=bg_compression,
options=options,
tempdir=tempdir
)
def compress_page(
segmented_page: SegmentedPage,
fg_compression: str,
bg_compression: str,
options: CompressionOptions,
tempdir: str
) -> CompressedPage:
# Compress foreground and background layers
return CompressedPage(
fg=compress_layer(
layer=segmented_page.fg,
compression=fg_compression,
options=options,
tempdir=tempdir
),
bg=compress_layer(
layer=segmented_page.bg,
compression=bg_compression,
options=options,
tempdir=tempdir
)
)
def compress_layer(
layer: Image,
compression: str,
options: CompressionOptions,
tempdir: str
) -> CompressedLayer:
# Compress the given layer
if compression == 'jbig2':
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
elif compression == 'jp2':
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
elif compression == 'jpeg':
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
else:
raise NotImplementedError()