129 lines
3.8 KiB
Python
129 lines
3.8 KiB
Python
# pdf-segmented: Generate PDFs using separate compression for foreground and background
|
|
# Copyright (C) 2025 Lee Yingtong Li
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
from .compression import CompressionOptions, compress_pages
|
|
from .input.xcf import xcf_get_pages
|
|
from .output.djvu import djvu_write_pages
|
|
from .output.pdf import pdf_write_pages
|
|
from .segmentation import segment_pages
|
|
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from typing import Optional
|
|
|
|
def convert_file(
|
|
input_file: str,
|
|
output_file: str,
|
|
input_format: Optional[str] = None,
|
|
output_format: Optional[str] = None,
|
|
fg_compression: Optional[str] = None,
|
|
bg_compression: Optional[str] = None,
|
|
options: CompressionOptions = CompressionOptions()
|
|
) -> None:
|
|
# Create temporary directory
|
|
tempdir = tempfile.mkdtemp('pdf-segmented')
|
|
|
|
try:
|
|
# Impute arguments
|
|
if input_format is None:
|
|
if input_file.endswith('.xcf'):
|
|
input_format = 'xcf'
|
|
else:
|
|
print('Warning: Unknown input file extension, assuming XCF', file=sys.stderr)
|
|
|
|
if output_format is None:
|
|
if output_file.endswith('.pdf'):
|
|
output_format = 'pdf'
|
|
elif output_file.endswith('.djvu'):
|
|
output_format = 'djvu'
|
|
else:
|
|
print('Error: Unknown output file extension (try --output-format)', file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if fg_compression is None:
|
|
if output_format == 'pdf':
|
|
fg_compression = 'jbig2'
|
|
elif output_format == 'djvu':
|
|
fg_compression = 'jb2'
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
if bg_compression is None:
|
|
if output_format == 'pdf':
|
|
bg_compression = 'jpeg'
|
|
elif output_format == 'djvu':
|
|
bg_compression = 'iw44'
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
# Validate format compatibility
|
|
if output_format == 'pdf':
|
|
if bg_compression not in ('jp2', 'jpeg', 'png'):
|
|
print('Error: Unsupported --bg-compression for PDF format (supported: jp2, jpeg, png)')
|
|
sys.exit(1)
|
|
if fg_compression not in ('jbig2', 'png'):
|
|
print('Error: Unsupported --fg-compression for PDF format (supported: jp2, jpeg, png)')
|
|
sys.exit(1)
|
|
|
|
if output_format == 'djvu':
|
|
if bg_compression != 'iw44':
|
|
print('Error: Unsupported --bg-compression for DjVu format (supported: iw44)')
|
|
sys.exit(1)
|
|
if fg_compression != 'jb2':
|
|
print('Error: Unsupported --fg-compression for DjVu format (supported: jb2)')
|
|
sys.exit(1)
|
|
|
|
# Get input pages
|
|
if input_format == 'xcf':
|
|
input_pages = xcf_get_pages(input_file)
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
# Segment foreground and background
|
|
segmented_pages = segment_pages(input_pages)
|
|
|
|
# Compress layers
|
|
compressed_pages = compress_pages(
|
|
input_pages=input_pages,
|
|
segmented_pages=segmented_pages,
|
|
fg_compression=fg_compression,
|
|
bg_compression=bg_compression,
|
|
options=options,
|
|
tempdir=tempdir
|
|
)
|
|
|
|
# Convert to output format
|
|
if output_format == 'pdf':
|
|
pdf_write_pages(
|
|
input_pages=input_pages,
|
|
compressed_pages=compressed_pages,
|
|
output_file=output_file
|
|
)
|
|
elif output_format == 'djvu':
|
|
djvu_write_pages(
|
|
input_pages=input_pages,
|
|
compressed_pages=compressed_pages,
|
|
output_file=output_file,
|
|
tempdir=tempdir
|
|
)
|
|
else:
|
|
raise NotImplementedError()
|
|
finally:
|
|
# Clean up
|
|
if os.path.exists(tempdir):
|
|
os.rmdir(tempdir)
|