Compare commits

..

2 Commits

Author SHA1 Message Date
60c6ee92e5
Implement PNG compression 2025-05-08 23:15:05 +10:00
cf4afa44a3
Implement JPEG2000 compression 2025-05-08 22:05:46 +10:00
4 changed files with 88 additions and 16 deletions

View File

@ -14,7 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .compression import compress_pages
from .compression import CompressionOptions, compress_pages
from .input.xcf import xcf_get_pages
from .output.pdf import pdf_write_pages
from .segmentation import segment_pages
@ -31,7 +31,7 @@ def convert_file(
output_format: Optional[str] = None,
fg_compression: str = 'jbig2',
bg_compression: str = 'jpeg',
jpeg_quality: Optional[float] = None
options: CompressionOptions = CompressionOptions()
) -> None:
# Create temporary directory
tempdir = tempfile.mkdtemp('pdf-segmented')
@ -63,7 +63,7 @@ def convert_file(
segmented_pages=segmented_pages,
fg_compression=fg_compression,
bg_compression=bg_compression,
jpeg_quality=jpeg_quality,
options=options,
tempdir=tempdir
)

View File

@ -15,6 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from . import convert_file
from .compression import CompressionOptions
import argparse
@ -27,8 +28,10 @@ parser.add_argument('input_file')
parser.add_argument('output_file')
parser.add_argument('--input-format', choices=['xcf'])
parser.add_argument('--output-format', choices=['pdf'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2', 'png'])
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2', 'png'])
parser.add_argument('--jp2-lossless', action='store_true')
parser.add_argument('--jp2-rate', type=float)
parser.add_argument('--jpeg-quality', type=float)
args = parser.parse_args()
@ -41,5 +44,9 @@ convert_file(
output_format=args.output_format,
fg_compression=args.fg_compression,
bg_compression=args.bg_compression,
jpeg_quality=args.jpeg_quality
options=CompressionOptions(
jp2_lossless=args.jp2_lossless,
jp2_rate=args.jp2_rate,
jpeg_quality=args.jpeg_quality
)
)

View File

@ -22,7 +22,9 @@ class CompressedLayer:
pass
from .jbig2 import jbig2_compress_layer
from .jp2 import jp2_compress_layer
from .jpeg import jpeg_compress_layer
from .png import png_compress_layer
from ..segmentation import SegmentedPage
from PIL import Image
@ -30,6 +32,12 @@ from PIL import Image
from dataclasses import dataclass
from typing import Generator, Iterable, Optional
@dataclass
class CompressionOptions:
jp2_lossless: bool = False
jp2_rate: Optional[float] = None
jpeg_quality: Optional[float] = None
@dataclass
class CompressedPage:
fg: CompressedLayer
@ -39,7 +47,7 @@ def compress_pages(
segmented_pages: Iterable[SegmentedPage],
fg_compression: str,
bg_compression: str,
jpeg_quality: Optional[float],
options: CompressionOptions,
tempdir: str
) -> Generator[CompressedPage]:
@ -49,7 +57,7 @@ def compress_pages(
segmented_page=segmented_page,
fg_compression=fg_compression,
bg_compression=bg_compression,
jpeg_quality=jpeg_quality,
options=options,
tempdir=tempdir
)
@ -57,7 +65,7 @@ def compress_page(
segmented_page: SegmentedPage,
fg_compression: str,
bg_compression: str,
jpeg_quality: Optional[float],
options: CompressionOptions,
tempdir: str
) -> CompressedPage:
@ -66,13 +74,15 @@ def compress_page(
fg=compress_layer(
layer=segmented_page.fg,
compression=fg_compression,
jpeg_quality=jpeg_quality,
is_foreground=True,
options=options,
tempdir=tempdir
),
bg=compress_layer(
layer=segmented_page.bg,
compression=bg_compression,
jpeg_quality=jpeg_quality,
is_foreground=False,
options=options,
tempdir=tempdir
)
)
@ -80,14 +90,19 @@ def compress_page(
def compress_layer(
layer: Image,
compression: str,
jpeg_quality: Optional[float],
is_foreground: bool,
options: CompressionOptions,
tempdir: str
) -> CompressedLayer:
# Compress the given layer
if compression == 'jbig2':
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
elif compression == 'jp2':
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
elif compression == 'jpeg':
return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality)
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
elif compression == 'png':
return png_compress_layer(layer=layer, is_foreground=is_foreground)
else:
raise NotImplementedError()

View File

@ -16,10 +16,12 @@
from ..compression import CompressedLayer, CompressedPage
from ..compression.jbig2 import JBIG2Layer
from ..compression.jp2 import JP2Layer
from ..compression.jpeg import JPEGLayer
from ..compression.png import PNGLayer
from ..input import InputPages
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream
from typing import Generator
@ -41,8 +43,8 @@ def pdf_write_pages(
# Write each layer to the page
content_instructions = []
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
# Generate content stream
wrapped_instructions = [
@ -62,6 +64,7 @@ def pdf_write_layer(
pdf: Pdf,
page: Page,
layer: CompressedLayer,
is_foreground: bool,
content_instructions,
) -> None:
@ -78,6 +81,17 @@ def pdf_write_layer(
BitsPerComponent=1,
Mask=[1, 1] # Layer mask
)
elif isinstance(layer, JP2Layer):
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
Filter=Name.JPXDecode,
BitsPerComponent=8
)
elif isinstance(layer, JPEGLayer):
pdf_write_image(
input_pages=input_pages,
@ -89,6 +103,42 @@ def pdf_write_layer(
Filter=Name.DCTDecode,
BitsPerComponent=8
)
elif isinstance(layer, PNGLayer):
if is_foreground:
# See PDF 1.7 section 7.4.4.3
# See also the implementation in img2pdf
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceGray,
Filter=Name.FlateDecode,
BitsPerComponent=1,
Mask=[1, 1], # Layer mask
DecodeParms=Dictionary(
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
BitsPerComponent=1, # Default is 8 so must set this here
Columns=input_pages.width
)
)
else:
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
Filter=Name.FlateDecode,
BitsPerComponent=8,
DecodeParms=Dictionary(
Predictor=15,
Colors=3, # Default is 1 so must set this here
Columns=input_pages.width
)
)
else:
raise NotImplementedError()