Implement JPEG2000 compression

This commit is contained in:
RunasSudo 2025-05-08 22:05:46 +10:00
parent 730cf23ba7
commit f6cbe3215b
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
5 changed files with 79 additions and 12 deletions

View File

@ -14,7 +14,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from .compression import compress_pages from .compression import CompressionOptions, compress_pages
from .input.xcf import xcf_get_pages from .input.xcf import xcf_get_pages
from .output.pdf import pdf_write_pages from .output.pdf import pdf_write_pages
from .segmentation import segment_pages from .segmentation import segment_pages
@ -31,7 +31,7 @@ def convert_file(
output_format: Optional[str] = None, output_format: Optional[str] = None,
fg_compression: str = 'jbig2', fg_compression: str = 'jbig2',
bg_compression: str = 'jpeg', bg_compression: str = 'jpeg',
jpeg_quality: Optional[float] = None options: CompressionOptions = CompressionOptions()
) -> None: ) -> None:
# Create temporary directory # Create temporary directory
tempdir = tempfile.mkdtemp('pdf-segmented') tempdir = tempfile.mkdtemp('pdf-segmented')
@ -63,7 +63,7 @@ def convert_file(
segmented_pages=segmented_pages, segmented_pages=segmented_pages,
fg_compression=fg_compression, fg_compression=fg_compression,
bg_compression=bg_compression, bg_compression=bg_compression,
jpeg_quality=jpeg_quality, options=options,
tempdir=tempdir tempdir=tempdir
) )

View File

@ -15,6 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from . import convert_file from . import convert_file
from .compression import CompressionOptions
import argparse import argparse
@ -28,7 +29,9 @@ parser.add_argument('output_file')
parser.add_argument('--input-format', choices=['xcf']) parser.add_argument('--input-format', choices=['xcf'])
parser.add_argument('--output-format', choices=['pdf']) parser.add_argument('--output-format', choices=['pdf'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2']) parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg']) parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
parser.add_argument('--jp2-lossless', action='store_true')
parser.add_argument('--jp2-rate', type=float)
parser.add_argument('--jpeg-quality', type=float) parser.add_argument('--jpeg-quality', type=float)
args = parser.parse_args() args = parser.parse_args()
@ -41,5 +44,9 @@ convert_file(
output_format=args.output_format, output_format=args.output_format,
fg_compression=args.fg_compression, fg_compression=args.fg_compression,
bg_compression=args.bg_compression, bg_compression=args.bg_compression,
jpeg_quality=args.jpeg_quality options=CompressionOptions(
jp2_lossless=args.jp2_lossless,
jp2_rate=args.jp2_rate,
jpeg_quality=args.jpeg_quality
)
) )

View File

@ -22,6 +22,7 @@ class CompressedLayer:
pass pass
from .jbig2 import jbig2_compress_layer from .jbig2 import jbig2_compress_layer
from .jp2 import jp2_compress_layer
from .jpeg import jpeg_compress_layer from .jpeg import jpeg_compress_layer
from ..segmentation import SegmentedPage from ..segmentation import SegmentedPage
@ -30,6 +31,12 @@ from PIL import Image
from dataclasses import dataclass from dataclasses import dataclass
from typing import Generator, Iterable, Optional from typing import Generator, Iterable, Optional
@dataclass
class CompressionOptions:
jp2_lossless: bool = False
jp2_rate: Optional[float] = None
jpeg_quality: Optional[float] = None
@dataclass @dataclass
class CompressedPage: class CompressedPage:
fg: CompressedLayer fg: CompressedLayer
@ -39,7 +46,7 @@ def compress_pages(
segmented_pages: Iterable[SegmentedPage], segmented_pages: Iterable[SegmentedPage],
fg_compression: str, fg_compression: str,
bg_compression: str, bg_compression: str,
jpeg_quality: Optional[float], options: CompressionOptions,
tempdir: str tempdir: str
) -> Generator[CompressedPage]: ) -> Generator[CompressedPage]:
@ -49,7 +56,7 @@ def compress_pages(
segmented_page=segmented_page, segmented_page=segmented_page,
fg_compression=fg_compression, fg_compression=fg_compression,
bg_compression=bg_compression, bg_compression=bg_compression,
jpeg_quality=jpeg_quality, options=options,
tempdir=tempdir tempdir=tempdir
) )
@ -57,7 +64,7 @@ def compress_page(
segmented_page: SegmentedPage, segmented_page: SegmentedPage,
fg_compression: str, fg_compression: str,
bg_compression: str, bg_compression: str,
jpeg_quality: Optional[float], options: CompressionOptions,
tempdir: str tempdir: str
) -> CompressedPage: ) -> CompressedPage:
@ -66,13 +73,13 @@ def compress_page(
fg=compress_layer( fg=compress_layer(
layer=segmented_page.fg, layer=segmented_page.fg,
compression=fg_compression, compression=fg_compression,
jpeg_quality=jpeg_quality, options=options,
tempdir=tempdir tempdir=tempdir
), ),
bg=compress_layer( bg=compress_layer(
layer=segmented_page.bg, layer=segmented_page.bg,
compression=bg_compression, compression=bg_compression,
jpeg_quality=jpeg_quality, options=options,
tempdir=tempdir tempdir=tempdir
) )
) )
@ -80,14 +87,16 @@ def compress_page(
def compress_layer( def compress_layer(
layer: Image, layer: Image,
compression: str, compression: str,
jpeg_quality: Optional[float], options: CompressionOptions,
tempdir: str tempdir: str
) -> CompressedLayer: ) -> CompressedLayer:
# Compress the given layer # Compress the given layer
if compression == 'jbig2': if compression == 'jbig2':
return jbig2_compress_layer(layer=layer, tempdir=tempdir) return jbig2_compress_layer(layer=layer, tempdir=tempdir)
elif compression == 'jp2':
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
elif compression == 'jpeg': elif compression == 'jpeg':
return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality) return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
else: else:
raise NotImplementedError() raise NotImplementedError()

View File

@ -0,0 +1,39 @@
# pdf-segmented: Generate PDFs using separate compression for foreground and background
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from . import CompressedLayer
from PIL import Image
from dataclasses import dataclass
import io
from typing import Optional
@dataclass
class JP2Layer(CompressedLayer):
data: bytes
def jp2_compress_layer(layer: Image, jp2_lossless: bool, jp2_rate: Optional[float]) -> JP2Layer:
save_kwargs = {}
if jp2_rate is not None:
save_kwargs['quality_mode'] = 'rates'
save_kwargs['quality_layers'] = [jp2_rate]
# Save image to JPEG 2000
bytesio = io.BytesIO()
layer.save(bytesio, format='jpeg2000', no_jp2=False, irreversible=not jp2_lossless, **save_kwargs)
return JP2Layer(data=bytesio.getvalue())

View File

@ -16,6 +16,7 @@
from ..compression import CompressedLayer, CompressedPage from ..compression import CompressedLayer, CompressedPage
from ..compression.jbig2 import JBIG2Layer from ..compression.jbig2 import JBIG2Layer
from ..compression.jp2 import JP2Layer
from ..compression.jpeg import JPEGLayer from ..compression.jpeg import JPEGLayer
from ..input import InputPages from ..input import InputPages
@ -78,6 +79,17 @@ def pdf_write_layer(
BitsPerComponent=1, BitsPerComponent=1,
Mask=[1, 1] # Layer mask Mask=[1, 1] # Layer mask
) )
elif isinstance(layer, JP2Layer):
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
Filter=Name.JPXDecode,
BitsPerComponent=8
)
elif isinstance(layer, JPEGLayer): elif isinstance(layer, JPEGLayer):
pdf_write_image( pdf_write_image(
input_pages=input_pages, input_pages=input_pages,