129 lines
3.8 KiB
Python

# pdf-segmented: Generate PDFs using separate compression for foreground and background
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .compression import CompressionOptions, compress_pages
from .input.xcf import xcf_get_pages
from .output.djvu import djvu_write_pages
from .output.pdf import pdf_write_pages
from .segmentation import segment_pages
import os
import sys
import tempfile
from typing import Optional
def convert_file(
input_file: str,
output_file: str,
input_format: Optional[str] = None,
output_format: Optional[str] = None,
fg_compression: Optional[str] = None,
bg_compression: Optional[str] = None,
options: CompressionOptions = CompressionOptions()
) -> None:
# Create temporary directory
tempdir = tempfile.mkdtemp('pdf-segmented')
try:
# Impute arguments
if input_format is None:
if input_file.endswith('.xcf'):
input_format = 'xcf'
else:
print('Warning: Unknown input file extension, assuming XCF', file=sys.stderr)
if output_format is None:
if output_file.endswith('.pdf'):
output_format = 'pdf'
elif output_file.endswith('.djvu'):
output_format = 'djvu'
else:
print('Error: Unknown output file extension (try --output-format)', file=sys.stderr)
sys.exit(1)
if fg_compression is None:
if output_format == 'pdf':
fg_compression = 'jbig2'
elif output_format == 'djvu':
fg_compression = 'jb2'
else:
raise NotImplementedError()
if bg_compression is None:
if output_format == 'pdf':
bg_compression = 'jpeg'
elif output_format == 'djvu':
bg_compression = 'iw44'
else:
raise NotImplementedError()
# Validate format compatibility
if output_format == 'pdf':
if bg_compression not in ('jp2', 'jpeg', 'png'):
print('Error: Unsupported --bg-compression for PDF format (supported: jp2, jpeg, png)')
sys.exit(1)
if fg_compression not in ('jbig2', 'png'):
print('Error: Unsupported --fg-compression for PDF format (supported: jp2, jpeg, png)')
sys.exit(1)
if output_format == 'djvu':
if bg_compression != 'iw44':
print('Error: Unsupported --bg-compression for DjVu format (supported: iw44)')
sys.exit(1)
if fg_compression != 'jb2':
print('Error: Unsupported --fg-compression for DjVu format (supported: jb2)')
sys.exit(1)
# Get input pages
if input_format == 'xcf':
input_pages = xcf_get_pages(input_file)
else:
raise NotImplementedError()
# Segment foreground and background
segmented_pages = segment_pages(input_pages)
# Compress layers
compressed_pages = compress_pages(
input_pages=input_pages,
segmented_pages=segmented_pages,
fg_compression=fg_compression,
bg_compression=bg_compression,
options=options,
tempdir=tempdir
)
# Convert to output format
if output_format == 'pdf':
pdf_write_pages(
input_pages=input_pages,
compressed_pages=compressed_pages,
output_file=output_file
)
elif output_format == 'djvu':
djvu_write_pages(
input_pages=input_pages,
compressed_pages=compressed_pages,
output_file=output_file,
tempdir=tempdir
)
else:
raise NotImplementedError()
finally:
# Clean up
if os.path.exists(tempdir):
os.rmdir(tempdir)