83 lines
2.4 KiB
Python

# pdf-segmented: Generate PDFs using separate compression for foreground and background
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .compression import CompressionOptions, compress_pages
from .input.xcf import xcf_get_pages
from .output.pdf import pdf_write_pages
from .segmentation import segment_pages
import os
import sys
import tempfile
from typing import Optional
def convert_file(
input_file: str,
output_file: str,
input_format: Optional[str] = None,
output_format: Optional[str] = None,
fg_compression: str = 'jbig2',
bg_compression: str = 'jpeg',
options: CompressionOptions = CompressionOptions()
) -> None:
# Create temporary directory
tempdir = tempfile.mkdtemp('pdf-segmented')
try:
# Impute arguments
if input_format is None:
if input_file.endswith('.xcf'):
input_format = 'xcf'
else:
print('Warning: Unknown input file extension, assuming XCF', file=sys.stderr)
if output_format is None:
if output_file.endswith('.pdf'):
output_format = 'pdf'
else:
print('Warning: Unknown output file extension, assuming PDF', file=sys.stderr)
# Get input pages
if input_format == 'xcf':
input_pages = xcf_get_pages(input_file)
else:
raise NotImplementedError()
# Segment foreground and background
segmented_pages = segment_pages(input_pages)
# Compress layers
compressed_pages = compress_pages(
segmented_pages=segmented_pages,
fg_compression=fg_compression,
bg_compression=bg_compression,
options=options,
tempdir=tempdir
)
# Convert to output format
if output_format == 'pdf':
pdf_write_pages(
input_pages=input_pages,
compressed_pages=compressed_pages,
output_file=output_file
)
else:
raise NotImplementedError()
finally:
# Clean up
if os.path.exists(tempdir):
os.rmdir(tempdir)