Compare commits
2 Commits
730cf23ba7
...
60c6ee92e5
Author | SHA1 | Date | |
---|---|---|---|
60c6ee92e5 | |||
cf4afa44a3 |
@ -14,7 +14,7 @@
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
from .compression import compress_pages
|
||||
from .compression import CompressionOptions, compress_pages
|
||||
from .input.xcf import xcf_get_pages
|
||||
from .output.pdf import pdf_write_pages
|
||||
from .segmentation import segment_pages
|
||||
@ -31,7 +31,7 @@ def convert_file(
|
||||
output_format: Optional[str] = None,
|
||||
fg_compression: str = 'jbig2',
|
||||
bg_compression: str = 'jpeg',
|
||||
jpeg_quality: Optional[float] = None
|
||||
options: CompressionOptions = CompressionOptions()
|
||||
) -> None:
|
||||
# Create temporary directory
|
||||
tempdir = tempfile.mkdtemp('pdf-segmented')
|
||||
@ -63,7 +63,7 @@ def convert_file(
|
||||
segmented_pages=segmented_pages,
|
||||
fg_compression=fg_compression,
|
||||
bg_compression=bg_compression,
|
||||
jpeg_quality=jpeg_quality,
|
||||
options=options,
|
||||
tempdir=tempdir
|
||||
)
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
from . import convert_file
|
||||
from .compression import CompressionOptions
|
||||
|
||||
import argparse
|
||||
|
||||
@ -27,8 +28,10 @@ parser.add_argument('input_file')
|
||||
parser.add_argument('output_file')
|
||||
parser.add_argument('--input-format', choices=['xcf'])
|
||||
parser.add_argument('--output-format', choices=['pdf'])
|
||||
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
|
||||
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg'])
|
||||
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2', 'png'])
|
||||
parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2', 'png'])
|
||||
parser.add_argument('--jp2-lossless', action='store_true')
|
||||
parser.add_argument('--jp2-rate', type=float)
|
||||
parser.add_argument('--jpeg-quality', type=float)
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -41,5 +44,9 @@ convert_file(
|
||||
output_format=args.output_format,
|
||||
fg_compression=args.fg_compression,
|
||||
bg_compression=args.bg_compression,
|
||||
options=CompressionOptions(
|
||||
jp2_lossless=args.jp2_lossless,
|
||||
jp2_rate=args.jp2_rate,
|
||||
jpeg_quality=args.jpeg_quality
|
||||
)
|
||||
)
|
||||
|
@ -22,7 +22,9 @@ class CompressedLayer:
|
||||
pass
|
||||
|
||||
from .jbig2 import jbig2_compress_layer
|
||||
from .jp2 import jp2_compress_layer
|
||||
from .jpeg import jpeg_compress_layer
|
||||
from .png import png_compress_layer
|
||||
from ..segmentation import SegmentedPage
|
||||
|
||||
from PIL import Image
|
||||
@ -30,6 +32,12 @@ from PIL import Image
|
||||
from dataclasses import dataclass
|
||||
from typing import Generator, Iterable, Optional
|
||||
|
||||
@dataclass
|
||||
class CompressionOptions:
|
||||
jp2_lossless: bool = False
|
||||
jp2_rate: Optional[float] = None
|
||||
jpeg_quality: Optional[float] = None
|
||||
|
||||
@dataclass
|
||||
class CompressedPage:
|
||||
fg: CompressedLayer
|
||||
@ -39,7 +47,7 @@ def compress_pages(
|
||||
segmented_pages: Iterable[SegmentedPage],
|
||||
fg_compression: str,
|
||||
bg_compression: str,
|
||||
jpeg_quality: Optional[float],
|
||||
options: CompressionOptions,
|
||||
tempdir: str
|
||||
) -> Generator[CompressedPage]:
|
||||
|
||||
@ -49,7 +57,7 @@ def compress_pages(
|
||||
segmented_page=segmented_page,
|
||||
fg_compression=fg_compression,
|
||||
bg_compression=bg_compression,
|
||||
jpeg_quality=jpeg_quality,
|
||||
options=options,
|
||||
tempdir=tempdir
|
||||
)
|
||||
|
||||
@ -57,7 +65,7 @@ def compress_page(
|
||||
segmented_page: SegmentedPage,
|
||||
fg_compression: str,
|
||||
bg_compression: str,
|
||||
jpeg_quality: Optional[float],
|
||||
options: CompressionOptions,
|
||||
tempdir: str
|
||||
) -> CompressedPage:
|
||||
|
||||
@ -66,13 +74,15 @@ def compress_page(
|
||||
fg=compress_layer(
|
||||
layer=segmented_page.fg,
|
||||
compression=fg_compression,
|
||||
jpeg_quality=jpeg_quality,
|
||||
is_foreground=True,
|
||||
options=options,
|
||||
tempdir=tempdir
|
||||
),
|
||||
bg=compress_layer(
|
||||
layer=segmented_page.bg,
|
||||
compression=bg_compression,
|
||||
jpeg_quality=jpeg_quality,
|
||||
is_foreground=False,
|
||||
options=options,
|
||||
tempdir=tempdir
|
||||
)
|
||||
)
|
||||
@ -80,14 +90,19 @@ def compress_page(
|
||||
def compress_layer(
|
||||
layer: Image,
|
||||
compression: str,
|
||||
jpeg_quality: Optional[float],
|
||||
is_foreground: bool,
|
||||
options: CompressionOptions,
|
||||
tempdir: str
|
||||
) -> CompressedLayer:
|
||||
|
||||
# Compress the given layer
|
||||
if compression == 'jbig2':
|
||||
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
|
||||
elif compression == 'jp2':
|
||||
return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
|
||||
elif compression == 'jpeg':
|
||||
return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality)
|
||||
return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
|
||||
elif compression == 'png':
|
||||
return png_compress_layer(layer=layer, is_foreground=is_foreground)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
@ -16,10 +16,12 @@
|
||||
|
||||
from ..compression import CompressedLayer, CompressedPage
|
||||
from ..compression.jbig2 import JBIG2Layer
|
||||
from ..compression.jp2 import JP2Layer
|
||||
from ..compression.jpeg import JPEGLayer
|
||||
from ..compression.png import PNGLayer
|
||||
from ..input import InputPages
|
||||
|
||||
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
|
||||
from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream
|
||||
|
||||
from typing import Generator
|
||||
|
||||
@ -41,8 +43,8 @@ def pdf_write_pages(
|
||||
|
||||
# Write each layer to the page
|
||||
content_instructions = []
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
|
||||
|
||||
# Generate content stream
|
||||
wrapped_instructions = [
|
||||
@ -62,6 +64,7 @@ def pdf_write_layer(
|
||||
pdf: Pdf,
|
||||
page: Page,
|
||||
layer: CompressedLayer,
|
||||
is_foreground: bool,
|
||||
content_instructions,
|
||||
) -> None:
|
||||
|
||||
@ -78,6 +81,17 @@ def pdf_write_layer(
|
||||
BitsPerComponent=1,
|
||||
Mask=[1, 1] # Layer mask
|
||||
)
|
||||
elif isinstance(layer, JP2Layer):
|
||||
pdf_write_image(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
value=layer.data,
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceRGB,
|
||||
Filter=Name.JPXDecode,
|
||||
BitsPerComponent=8
|
||||
)
|
||||
elif isinstance(layer, JPEGLayer):
|
||||
pdf_write_image(
|
||||
input_pages=input_pages,
|
||||
@ -89,6 +103,42 @@ def pdf_write_layer(
|
||||
Filter=Name.DCTDecode,
|
||||
BitsPerComponent=8
|
||||
)
|
||||
elif isinstance(layer, PNGLayer):
|
||||
if is_foreground:
|
||||
# See PDF 1.7 section 7.4.4.3
|
||||
# See also the implementation in img2pdf
|
||||
pdf_write_image(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
value=layer.get_flate_data(),
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceGray,
|
||||
Filter=Name.FlateDecode,
|
||||
BitsPerComponent=1,
|
||||
Mask=[1, 1], # Layer mask
|
||||
DecodeParms=Dictionary(
|
||||
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
|
||||
BitsPerComponent=1, # Default is 8 so must set this here
|
||||
Columns=input_pages.width
|
||||
)
|
||||
)
|
||||
else:
|
||||
pdf_write_image(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
value=layer.get_flate_data(),
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceRGB,
|
||||
Filter=Name.FlateDecode,
|
||||
BitsPerComponent=8,
|
||||
DecodeParms=Dictionary(
|
||||
Predictor=15,
|
||||
Colors=3, # Default is 1 so must set this here
|
||||
Columns=input_pages.width
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user