Implement JPEG2000 compression
This commit is contained in:
parent
730cf23ba7
commit
f6cbe3215b
@ -14,7 +14,7 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from .compression import compress_pages
|
from .compression import CompressionOptions, compress_pages
|
||||||
from .input.xcf import xcf_get_pages
|
from .input.xcf import xcf_get_pages
|
||||||
from .output.pdf import pdf_write_pages
|
from .output.pdf import pdf_write_pages
|
||||||
from .segmentation import segment_pages
|
from .segmentation import segment_pages
|
||||||
@ -31,7 +31,7 @@ def convert_file(
|
|||||||
output_format: Optional[str] = None,
|
output_format: Optional[str] = None,
|
||||||
fg_compression: str = 'jbig2',
|
fg_compression: str = 'jbig2',
|
||||||
bg_compression: str = 'jpeg',
|
bg_compression: str = 'jpeg',
|
||||||
jpeg_quality: Optional[float] = None
|
options: CompressionOptions = CompressionOptions()
|
||||||
) -> None:
|
) -> None:
|
||||||
# Create temporary directory
|
# Create temporary directory
|
||||||
tempdir = tempfile.mkdtemp('pdf-segmented')
|
tempdir = tempfile.mkdtemp('pdf-segmented')
|
||||||
@ -63,7 +63,7 @@ def convert_file(
|
|||||||
segmented_pages=segmented_pages,
|
segmented_pages=segmented_pages,
|
||||||
fg_compression=fg_compression,
|
fg_compression=fg_compression,
|
||||||
bg_compression=bg_compression,
|
bg_compression=bg_compression,
|
||||||
jpeg_quality=jpeg_quality,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from . import convert_file
|
from . import convert_file
|
||||||
|
from .compression import CompressionOptions
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
@ -28,7 +29,9 @@ parser.add_argument('output_file')
|
|||||||
parser.add_argument('--input-format', choices=['xcf'])
|
parser.add_argument('--input-format', choices=['xcf'])
|
||||||
parser.add_argument('--output-format', choices=['pdf'])
|
parser.add_argument('--output-format', choices=['pdf'])
|
||||||
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
|
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
|
||||||
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg'])
|
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
|
||||||
|
parser.add_argument('--jp2-lossless', action='store_true')
|
||||||
|
parser.add_argument('--jp2-rate', type=float)
|
||||||
parser.add_argument('--jpeg-quality', type=float)
|
parser.add_argument('--jpeg-quality', type=float)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -41,5 +44,9 @@ convert_file(
|
|||||||
output_format=args.output_format,
|
output_format=args.output_format,
|
||||||
fg_compression=args.fg_compression,
|
fg_compression=args.fg_compression,
|
||||||
bg_compression=args.bg_compression,
|
bg_compression=args.bg_compression,
|
||||||
jpeg_quality=args.jpeg_quality
|
options=CompressionOptions(
|
||||||
|
jp2_lossless=args.jp2_lossless,
|
||||||
|
jp2_rate=args.jp2_rate,
|
||||||
|
jpeg_quality=args.jpeg_quality
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
@ -22,6 +22,7 @@ class CompressedLayer:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
from .jbig2 import jbig2_compress_layer
|
from .jbig2 import jbig2_compress_layer
|
||||||
|
from .jp2 import jp2_compress_layer
|
||||||
from .jpeg import jpeg_compress_layer
|
from .jpeg import jpeg_compress_layer
|
||||||
from ..segmentation import SegmentedPage
|
from ..segmentation import SegmentedPage
|
||||||
|
|
||||||
@ -30,6 +31,12 @@ from PIL import Image
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Generator, Iterable, Optional
|
from typing import Generator, Iterable, Optional
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompressionOptions:
|
||||||
|
jp2_lossless: bool = False
|
||||||
|
jp2_rate: Optional[float] = None
|
||||||
|
jpeg_quality: Optional[float] = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CompressedPage:
|
class CompressedPage:
|
||||||
fg: CompressedLayer
|
fg: CompressedLayer
|
||||||
@ -39,7 +46,7 @@ def compress_pages(
|
|||||||
segmented_pages: Iterable[SegmentedPage],
|
segmented_pages: Iterable[SegmentedPage],
|
||||||
fg_compression: str,
|
fg_compression: str,
|
||||||
bg_compression: str,
|
bg_compression: str,
|
||||||
jpeg_quality: Optional[float],
|
options: CompressionOptions,
|
||||||
tempdir: str
|
tempdir: str
|
||||||
) -> Generator[CompressedPage]:
|
) -> Generator[CompressedPage]:
|
||||||
|
|
||||||
@ -49,7 +56,7 @@ def compress_pages(
|
|||||||
segmented_page=segmented_page,
|
segmented_page=segmented_page,
|
||||||
fg_compression=fg_compression,
|
fg_compression=fg_compression,
|
||||||
bg_compression=bg_compression,
|
bg_compression=bg_compression,
|
||||||
jpeg_quality=jpeg_quality,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -57,7 +64,7 @@ def compress_page(
|
|||||||
segmented_page: SegmentedPage,
|
segmented_page: SegmentedPage,
|
||||||
fg_compression: str,
|
fg_compression: str,
|
||||||
bg_compression: str,
|
bg_compression: str,
|
||||||
jpeg_quality: Optional[float],
|
options: CompressionOptions,
|
||||||
tempdir: str
|
tempdir: str
|
||||||
) -> CompressedPage:
|
) -> CompressedPage:
|
||||||
|
|
||||||
@ -66,13 +73,13 @@ def compress_page(
|
|||||||
fg=compress_layer(
|
fg=compress_layer(
|
||||||
layer=segmented_page.fg,
|
layer=segmented_page.fg,
|
||||||
compression=fg_compression,
|
compression=fg_compression,
|
||||||
jpeg_quality=jpeg_quality,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
),
|
),
|
||||||
bg=compress_layer(
|
bg=compress_layer(
|
||||||
layer=segmented_page.bg,
|
layer=segmented_page.bg,
|
||||||
compression=bg_compression,
|
compression=bg_compression,
|
||||||
jpeg_quality=jpeg_quality,
|
options=options,
|
||||||
tempdir=tempdir
|
tempdir=tempdir
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -80,14 +87,16 @@ def compress_page(
|
|||||||
def compress_layer(
|
def compress_layer(
|
||||||
layer: Image,
|
layer: Image,
|
||||||
compression: str,
|
compression: str,
|
||||||
jpeg_quality: Optional[float],
|
options: CompressionOptions,
|
||||||
tempdir: str
|
tempdir: str
|
||||||
) -> CompressedLayer:
|
) -> CompressedLayer:
|
||||||
|
|
||||||
# Compress the given layer
|
# Compress the given layer
|
||||||
if compression == 'jbig2':
|
if compression == 'jbig2':
|
||||||
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
|
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
|
||||||
|
elif compression == 'jp2':
|
||||||
|
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
|
||||||
elif compression == 'jpeg':
|
elif compression == 'jpeg':
|
||||||
return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality)
|
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
39
pdf_segmented/compression/jp2.py
Normal file
39
pdf_segmented/compression/jp2.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# pdf-segmented: Generate PDFs using separate compression for foreground and background
|
||||||
|
# Copyright (C) 2025 Lee Yingtong Li
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from . import CompressedLayer
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import io
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class JP2Layer(CompressedLayer):
|
||||||
|
data: bytes
|
||||||
|
|
||||||
|
def jp2_compress_layer(layer: Image, jp2_lossless: bool, jp2_rate: Optional[float]) -> JP2Layer:
|
||||||
|
save_kwargs = {}
|
||||||
|
if jp2_rate is not None:
|
||||||
|
save_kwargs['quality_mode'] = 'rates'
|
||||||
|
save_kwargs['quality_layers'] = [jp2_rate]
|
||||||
|
|
||||||
|
# Save image to JPEG 2000
|
||||||
|
bytesio = io.BytesIO()
|
||||||
|
layer.save(bytesio, format='jpeg2000', no_jp2=False, irreversible=not jp2_lossless, **save_kwargs)
|
||||||
|
|
||||||
|
return JP2Layer(data=bytesio.getvalue())
|
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
from ..compression import CompressedLayer, CompressedPage
|
from ..compression import CompressedLayer, CompressedPage
|
||||||
from ..compression.jbig2 import JBIG2Layer
|
from ..compression.jbig2 import JBIG2Layer
|
||||||
|
from ..compression.jp2 import JP2Layer
|
||||||
from ..compression.jpeg import JPEGLayer
|
from ..compression.jpeg import JPEGLayer
|
||||||
from ..input import InputPages
|
from ..input import InputPages
|
||||||
|
|
||||||
@ -78,6 +79,17 @@ def pdf_write_layer(
|
|||||||
BitsPerComponent=1,
|
BitsPerComponent=1,
|
||||||
Mask=[1, 1] # Layer mask
|
Mask=[1, 1] # Layer mask
|
||||||
)
|
)
|
||||||
|
elif isinstance(layer, JP2Layer):
|
||||||
|
pdf_write_image(
|
||||||
|
input_pages=input_pages,
|
||||||
|
pdf=pdf,
|
||||||
|
page=page,
|
||||||
|
value=layer.data,
|
||||||
|
content_instructions=content_instructions,
|
||||||
|
ColorSpace=Name.DeviceRGB,
|
||||||
|
Filter=Name.JPXDecode,
|
||||||
|
BitsPerComponent=8
|
||||||
|
)
|
||||||
elif isinstance(layer, JPEGLayer):
|
elif isinstance(layer, JPEGLayer):
|
||||||
pdf_write_image(
|
pdf_write_image(
|
||||||
input_pages=input_pages,
|
input_pages=input_pages,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user