# pdf-segmented: Generate PDFs using separate compression for foreground and background # Copyright (C) 2025 Lee Yingtong Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from .input import InputPages import numpy from PIL import Image from dataclasses import dataclass from typing import Generator, List @dataclass class SegmentedPage: fg: Image bg: Image def segment_pages(input_pages: InputPages) -> Generator[SegmentedPage]: for page_num, input_page in enumerate(input_pages.pages): print('Page {} of {}'.format(page_num + 1, input_pages.num_pages)) yield segment_page(input_page) def segment_page(input_page: Image) -> SegmentedPage: # Segment the input page into foreground and background # Convert image to foreground/background image_rgb = input_page.convert('RGB') numpy_rgb = numpy.asarray(image_rgb) # Precompute black and white pixels black_pixels = (numpy_rgb[:,:,0] == 0) & (numpy_rgb[:,:,1] == 0) & (numpy_rgb[:,:,2] == 0) white_pixels = (numpy_rgb[:,:,0] == 255) & (numpy_rgb[:,:,1] == 255) & (numpy_rgb[:,:,2] == 255) black_or_white = black_pixels | white_pixels # Precompute pixels with all neighbours either black or white bw1 = numpy.roll(black_or_white, (1, 1), (0, 1)) bw1[0,:] = True bw1[:,0] = True bw2 = numpy.roll(black_or_white, (1, 0), (0, 1)) bw2[0,:] = True bw3 = numpy.roll(black_or_white, (1, -1), (0, 1)) bw3[0,:] = True bw3[:,-1] = True bw4 = numpy.roll(black_or_white, (0, -1), (0, 1)) bw4[:,-1] = True bw5 = numpy.roll(black_or_white, (-1, -1), (0, 1)) bw5[-1,:] = True bw5[:,-1] = True bw6 = numpy.roll(black_or_white, (-1, 0), (0, 1)) bw6[-1,:] = True bw7 = numpy.roll(black_or_white, (-1, 1), (0, 1)) bw7[-1,:] = True bw7[:,0] = True bw8 = numpy.roll(black_or_white, (0, 1), (0, 1)) bw8[:,0] = True bw_neighbours = bw1 & bw2 & bw3 & bw4 & bw5 & bw6 & bw7 & bw8 # Foreground is only black pixels with all neighbours either black or white fg_pixels = black_pixels & bw_neighbours # Foreground - white out all non-foreground pixels numpy_fg = numpy_rgb.copy() numpy_fg[~fg_pixels,:] = [255, 255, 255] image_fg = Image.fromarray(numpy_fg, image_rgb.mode) # Background - white out all foreground pixels numpy_bg = numpy_rgb.copy() numpy_bg[fg_pixels,:] = [255, 255, 255] image_bg = Image.fromarray(numpy_bg, image_rgb.mode) # TODO: Handle case where empty background or foreground return SegmentedPage(fg=image_fg, bg=image_bg)