diff --git a/pdf_segmented/__init__.py b/pdf_segmented/__init__.py
index 5b45ea2..0a84634 100644
--- a/pdf_segmented/__init__.py
+++ b/pdf_segmented/__init__.py
@@ -14,7 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
-from .compression import compress_pages
+from .compression import CompressionOptions, compress_pages
from .input.xcf import xcf_get_pages
from .output.pdf import pdf_write_pages
from .segmentation import segment_pages
@@ -31,7 +31,7 @@ def convert_file(
output_format: Optional[str] = None,
fg_compression: str = 'jbig2',
bg_compression: str = 'jpeg',
- jpeg_quality: Optional[float] = None
+ options: CompressionOptions = CompressionOptions()
) -> None:
# Create temporary directory
tempdir = tempfile.mkdtemp('pdf-segmented')
@@ -63,7 +63,7 @@ def convert_file(
segmented_pages=segmented_pages,
fg_compression=fg_compression,
bg_compression=bg_compression,
- jpeg_quality=jpeg_quality,
+ options=options,
tempdir=tempdir
)
diff --git a/pdf_segmented/__main__.py b/pdf_segmented/__main__.py
index d8c4f0b..f6bda5b 100644
--- a/pdf_segmented/__main__.py
+++ b/pdf_segmented/__main__.py
@@ -15,6 +15,7 @@
# along with this program. If not, see .
from . import convert_file
+from .compression import CompressionOptions
import argparse
@@ -28,7 +29,9 @@ parser.add_argument('output_file')
parser.add_argument('--input-format', choices=['xcf'])
parser.add_argument('--output-format', choices=['pdf'])
parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
-parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg'])
+parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
+parser.add_argument('--jp2-lossless', action='store_true')
+parser.add_argument('--jp2-rate', type=float)
parser.add_argument('--jpeg-quality', type=float)
args = parser.parse_args()
@@ -41,5 +44,9 @@ convert_file(
output_format=args.output_format,
fg_compression=args.fg_compression,
bg_compression=args.bg_compression,
- jpeg_quality=args.jpeg_quality
+ options=CompressionOptions(
+ jp2_lossless=args.jp2_lossless,
+ jp2_rate=args.jp2_rate,
+ jpeg_quality=args.jpeg_quality
+ )
)
diff --git a/pdf_segmented/compression/__init__.py b/pdf_segmented/compression/__init__.py
index 900973f..d3028a0 100644
--- a/pdf_segmented/compression/__init__.py
+++ b/pdf_segmented/compression/__init__.py
@@ -22,6 +22,7 @@ class CompressedLayer:
pass
from .jbig2 import jbig2_compress_layer
+from .jp2 import jp2_compress_layer
from .jpeg import jpeg_compress_layer
from ..segmentation import SegmentedPage
@@ -30,6 +31,12 @@ from PIL import Image
from dataclasses import dataclass
from typing import Generator, Iterable, Optional
+@dataclass
+class CompressionOptions:
+ jp2_lossless: bool = False
+ jp2_rate: Optional[float] = None
+ jpeg_quality: Optional[float] = None
+
@dataclass
class CompressedPage:
fg: CompressedLayer
@@ -39,7 +46,7 @@ def compress_pages(
segmented_pages: Iterable[SegmentedPage],
fg_compression: str,
bg_compression: str,
- jpeg_quality: Optional[float],
+ options: CompressionOptions,
tempdir: str
) -> Generator[CompressedPage]:
@@ -49,7 +56,7 @@ def compress_pages(
segmented_page=segmented_page,
fg_compression=fg_compression,
bg_compression=bg_compression,
- jpeg_quality=jpeg_quality,
+ options=options,
tempdir=tempdir
)
@@ -57,7 +64,7 @@ def compress_page(
segmented_page: SegmentedPage,
fg_compression: str,
bg_compression: str,
- jpeg_quality: Optional[float],
+ options: CompressionOptions,
tempdir: str
) -> CompressedPage:
@@ -66,13 +73,13 @@ def compress_page(
fg=compress_layer(
layer=segmented_page.fg,
compression=fg_compression,
- jpeg_quality=jpeg_quality,
+ options=options,
tempdir=tempdir
),
bg=compress_layer(
layer=segmented_page.bg,
compression=bg_compression,
- jpeg_quality=jpeg_quality,
+ options=options,
tempdir=tempdir
)
)
@@ -80,14 +87,16 @@ def compress_page(
def compress_layer(
layer: Image,
compression: str,
- jpeg_quality: Optional[float],
+ options: CompressionOptions,
tempdir: str
) -> CompressedLayer:
# Compress the given layer
if compression == 'jbig2':
return jbig2_compress_layer(layer=layer, tempdir=tempdir)
+ elif compression == 'jp2':
+ return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
elif compression == 'jpeg':
- return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality)
+ return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
else:
raise NotImplementedError()
diff --git a/pdf_segmented/output/pdf.py b/pdf_segmented/output/pdf.py
index 56073a7..9df79c9 100644
--- a/pdf_segmented/output/pdf.py
+++ b/pdf_segmented/output/pdf.py
@@ -16,6 +16,7 @@
from ..compression import CompressedLayer, CompressedPage
from ..compression.jbig2 import JBIG2Layer
+from ..compression.jp2 import JP2Layer
from ..compression.jpeg import JPEGLayer
from ..input import InputPages
@@ -78,6 +79,17 @@ def pdf_write_layer(
BitsPerComponent=1,
Mask=[1, 1] # Layer mask
)
+ elif isinstance(layer, JP2Layer):
+ pdf_write_image(
+ input_pages=input_pages,
+ pdf=pdf,
+ page=page,
+ value=layer.data,
+ content_instructions=content_instructions,
+ ColorSpace=Name.DeviceRGB,
+ Filter=Name.JPXDecode,
+ BitsPerComponent=8
+ )
elif isinstance(layer, JPEGLayer):
pdf_write_image(
input_pages=input_pages,