diff --git a/pdf_segmented/__init__.py b/pdf_segmented/__init__.py index 5b45ea2..0a84634 100644 --- a/pdf_segmented/__init__.py +++ b/pdf_segmented/__init__.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from .compression import compress_pages +from .compression import CompressionOptions, compress_pages from .input.xcf import xcf_get_pages from .output.pdf import pdf_write_pages from .segmentation import segment_pages @@ -31,7 +31,7 @@ def convert_file( output_format: Optional[str] = None, fg_compression: str = 'jbig2', bg_compression: str = 'jpeg', - jpeg_quality: Optional[float] = None + options: CompressionOptions = CompressionOptions() ) -> None: # Create temporary directory tempdir = tempfile.mkdtemp('pdf-segmented') @@ -63,7 +63,7 @@ def convert_file( segmented_pages=segmented_pages, fg_compression=fg_compression, bg_compression=bg_compression, - jpeg_quality=jpeg_quality, + options=options, tempdir=tempdir ) diff --git a/pdf_segmented/__main__.py b/pdf_segmented/__main__.py index d8c4f0b..f6bda5b 100644 --- a/pdf_segmented/__main__.py +++ b/pdf_segmented/__main__.py @@ -15,6 +15,7 @@ # along with this program. If not, see . from . import convert_file +from .compression import CompressionOptions import argparse @@ -28,7 +29,9 @@ parser.add_argument('output_file') parser.add_argument('--input-format', choices=['xcf']) parser.add_argument('--output-format', choices=['pdf']) parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2']) -parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg']) +parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2']) +parser.add_argument('--jp2-lossless', action='store_true') +parser.add_argument('--jp2-rate', type=float) parser.add_argument('--jpeg-quality', type=float) args = parser.parse_args() @@ -41,5 +44,9 @@ convert_file( output_format=args.output_format, fg_compression=args.fg_compression, bg_compression=args.bg_compression, - jpeg_quality=args.jpeg_quality + options=CompressionOptions( + jp2_lossless=args.jp2_lossless, + jp2_rate=args.jp2_rate, + jpeg_quality=args.jpeg_quality + ) ) diff --git a/pdf_segmented/compression/__init__.py b/pdf_segmented/compression/__init__.py index 900973f..d3028a0 100644 --- a/pdf_segmented/compression/__init__.py +++ b/pdf_segmented/compression/__init__.py @@ -22,6 +22,7 @@ class CompressedLayer: pass from .jbig2 import jbig2_compress_layer +from .jp2 import jp2_compress_layer from .jpeg import jpeg_compress_layer from ..segmentation import SegmentedPage @@ -30,6 +31,12 @@ from PIL import Image from dataclasses import dataclass from typing import Generator, Iterable, Optional +@dataclass +class CompressionOptions: + jp2_lossless: bool = False + jp2_rate: Optional[float] = None + jpeg_quality: Optional[float] = None + @dataclass class CompressedPage: fg: CompressedLayer @@ -39,7 +46,7 @@ def compress_pages( segmented_pages: Iterable[SegmentedPage], fg_compression: str, bg_compression: str, - jpeg_quality: Optional[float], + options: CompressionOptions, tempdir: str ) -> Generator[CompressedPage]: @@ -49,7 +56,7 @@ def compress_pages( segmented_page=segmented_page, fg_compression=fg_compression, bg_compression=bg_compression, - jpeg_quality=jpeg_quality, + options=options, tempdir=tempdir ) @@ -57,7 +64,7 @@ def compress_page( segmented_page: SegmentedPage, fg_compression: str, bg_compression: str, - jpeg_quality: Optional[float], + options: CompressionOptions, tempdir: str ) -> CompressedPage: @@ -66,13 +73,13 @@ def compress_page( fg=compress_layer( layer=segmented_page.fg, compression=fg_compression, - jpeg_quality=jpeg_quality, + options=options, tempdir=tempdir ), bg=compress_layer( layer=segmented_page.bg, compression=bg_compression, - jpeg_quality=jpeg_quality, + options=options, tempdir=tempdir ) ) @@ -80,14 +87,16 @@ def compress_page( def compress_layer( layer: Image, compression: str, - jpeg_quality: Optional[float], + options: CompressionOptions, tempdir: str ) -> CompressedLayer: # Compress the given layer if compression == 'jbig2': return jbig2_compress_layer(layer=layer, tempdir=tempdir) + elif compression == 'jp2': + return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate) elif compression == 'jpeg': - return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality) + return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality) else: raise NotImplementedError() diff --git a/pdf_segmented/compression/jp2.py b/pdf_segmented/compression/jp2.py new file mode 100644 index 0000000..5f76d80 --- /dev/null +++ b/pdf_segmented/compression/jp2.py @@ -0,0 +1,39 @@ +# pdf-segmented: Generate PDFs using separate compression for foreground and background +# Copyright (C) 2025 Lee Yingtong Li +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from . import CompressedLayer + +from PIL import Image + +from dataclasses import dataclass +import io +from typing import Optional + +@dataclass +class JP2Layer(CompressedLayer): + data: bytes + +def jp2_compress_layer(layer: Image, jp2_lossless: bool, jp2_rate: Optional[float]) -> JP2Layer: + save_kwargs = {} + if jp2_rate is not None: + save_kwargs['quality_mode'] = 'rates' + save_kwargs['quality_layers'] = [jp2_rate] + + # Save image to JPEG 2000 + bytesio = io.BytesIO() + layer.save(bytesio, format='jpeg2000', no_jp2=False, irreversible=not jp2_lossless, **save_kwargs) + + return JP2Layer(data=bytesio.getvalue()) diff --git a/pdf_segmented/output/pdf.py b/pdf_segmented/output/pdf.py index 56073a7..9df79c9 100644 --- a/pdf_segmented/output/pdf.py +++ b/pdf_segmented/output/pdf.py @@ -16,6 +16,7 @@ from ..compression import CompressedLayer, CompressedPage from ..compression.jbig2 import JBIG2Layer +from ..compression.jp2 import JP2Layer from ..compression.jpeg import JPEGLayer from ..input import InputPages @@ -78,6 +79,17 @@ def pdf_write_layer( BitsPerComponent=1, Mask=[1, 1] # Layer mask ) + elif isinstance(layer, JP2Layer): + pdf_write_image( + input_pages=input_pages, + pdf=pdf, + page=page, + value=layer.data, + content_instructions=content_instructions, + ColorSpace=Name.DeviceRGB, + Filter=Name.JPXDecode, + BitsPerComponent=8 + ) elif isinstance(layer, JPEGLayer): pdf_write_image( input_pages=input_pages,