diff options
author | Alex Xu (Hello71) <alex_y_xu@yahoo.ca> | 2021-10-05 12:45:38 -0400 |
---|---|---|
committer | Alex Xu (Hello71) <alex_y_xu@yahoo.ca> | 2021-10-05 12:45:38 -0400 |
commit | bc64b0cc23da05cc845cc2fb72f04eafa8fa04de (patch) | |
tree | 71951af19fd7591c1d32f80dc20d5a86c8dd310d | |
parent | 7b82460ff5fe21fc4d2a98bb425d77bcb44dce7b (diff) | |
download | wfs-bc64b0cc23da05cc845cc2fb72f04eafa8fa04de.tar.xz wfs-bc64b0cc23da05cc845cc2fb72f04eafa8fa04de.zip |
optimize, remove broken --flavor, fix screenshots
-rwxr-xr-x | wfs.py | 61 |
1 files changed, 24 insertions, 37 deletions
@@ -2,13 +2,12 @@ import argparse import logging -import re +import multiprocessing import os import signal import sys from itertools import chain -import multiprocessing from multiprocessing import Pool from multiprocessing.util import Finalize from pathlib import Path @@ -59,12 +58,6 @@ def stop_driver(): if DRIVER: DRIVER.quit() DRIVER = None -def hook_sys(name): - orig_hook = getattr(sys, name) - def my_hook(*args, **kwargs): - stop_driver() - orig_hook(*args, **kwargs) - setattr(sys, name, my_hook) def hook_sig(signum): orig_handler = signal.getsignal(signum) if orig_handler is None: @@ -75,6 +68,11 @@ def hook_sig(signum): os.kill(os.getpid(), signum) signal.signal(signum, term_handler) def start_wworker(driver_name): + # clamp selenium wasteful sleeps to 0.1s + import time + from time import sleep + time.sleep = lambda secs: sleep(min(secs, 0.1)) + hook_sig(signal.SIGTERM) global DRIVER if driver_name == 'chrome': @@ -108,14 +106,13 @@ def extract(path, fonts, screenshots): DRIVER.get(make_uri(path)) if screenshots: logger.info('replacing fonts for %s', path) - DRIVER.execute_script(''.join([ + height = DRIVER.execute_script(''.join([ "let style = document.createElement('style'); style.innerHTML = '", ''.join(gen_font_face(font) for font in fonts), - "'; document.body.appendChild(style);"])) + "'; document.body.appendChild(style); return document.body.parentNode.scrollHeight"])) logger.info('taking pre-screenshot for %s', path) - height = DRIVER.execute_script('return document.body.parentNode.scrollHeight') - DRIVER.set_window_size(2000, height) - screenshot = DRIVER.find_element_by_tag_name('body').screenshot_as_png + DRIVER.set_window_size(1920, height) + screenshot = DRIVER.get_screenshot_as_png() else: screenshot = None logger.info('extracting text from %s', path) @@ -129,24 +126,16 @@ def get_fontdesc(fonts, fontspec): return font return None -def subset(fontdesc, text, flavors, fts_opts): +def subset(fontdesc, text, fts_opts): fontfile = fontdesc['fontfile'] logger.info('subsetting %s', fontfile) font = load_font(fontfile, fts_opts, dontLoadGlyphNames=True) subsetter = Subsetter(options=fts_opts) - if 'extratext' in fontdesc: - text += fontdesc['extratext'] subsetter.populate(text=text) subsetter.subset(font) ret = [] - for flavor in flavors: - if 'outfile' in fontdesc and flavor in fontdesc['outfile']: - outfile = fontdesc['outfile'][flavor] - else: - outfile = re.sub(r'\.[ot]tf$', f'.subset.{flavor}', fontfile) - if outfile == fontfile: - raise Exception('cannot overwrite font file') - ret.append((font, flavor, outfile)) + outfile = fontfile[:fontfile.rindex('.')] + '.subset.woff2' + ret.append((font, 'woff2', outfile)) return ret def write_subset(font, flavor, outfile): @@ -157,14 +146,17 @@ def write_subset(font, flavor, outfile): def verify(path, screenshot_begin_png): logger.info('refetching %s', path) DRIVER.get(make_uri(path)) + + from io import BytesIO + from PIL import Image, ImageChops + screenshot_begin = Image.open(BytesIO(screenshot_begin_png)).convert('RGB') + logger.info('taking post-screenshot for %s', path) - height = DRIVER.execute_script('return document.body.parentNode.scrollHeight') - DRIVER.set_window_size(2000, height) - screenshot_end_png = DRIVER.find_element_by_tag_name('body').screenshot_as_png + DRIVER.set_window_size(*screenshot_begin.size) + screenshot_end_png = DRIVER.get_screenshot_as_png() logger.info('checking screenshot for %s', path) - screenshot_begin = Image.open(BytesIO(screenshot_begin_png)) - screenshot_end = Image.open(BytesIO(screenshot_end_png)) + screenshot_end = Image.open(BytesIO(screenshot_end_png)).convert('RGB') if ImageChops.difference(screenshot_begin, screenshot_end).getbbox(): raise Exception(f'screenshots do not match for {path}') @@ -183,7 +175,6 @@ def main(argv): parser.add_argument('--driver', help='selenium driver name (chrome or firefox)', default='chrome') parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true') parser.add_argument('--font', help='add font (fontfile:family:weight:style)', action='append') - parser.add_argument('--flavor', help='add flavor (woff or woff2)', action='append', default=['woff2']) parser.add_argument('file', help='html files', nargs='+') args, leftover = parser.parse_known_intermixed_args(argv) options = Options() @@ -197,12 +188,8 @@ def main(argv): ncpus = len(os.sched_getaffinity(0)) fonts = [dict(zip(['fontfile', 'family', 'weight', 'style'], font.split(':'))) for font in args.font] nwworkers = min(len(files), ncpus) - nfworkers = min(len(fonts) * 2, ncpus) - logger.info('starting %d web workers, %d font workers', nwworkers, nfworkers) - - if not args.no_screenshots: - from io import BytesIO - from PIL import Image, ImageChops + nfworkers = min(len(fonts), ncpus) + logger.info('using %d web workers, %d font workers', nwworkers, nfworkers) with Pool(nfworkers) as fpool, \ Pool(nwworkers, start_wworker, (args.driver,)) as wpool: @@ -222,7 +209,7 @@ def main(argv): logger.info('shutting down web workers early') wpool.close() subset_args = ( - (get_fontdesc(fonts, fontspec), ''.join(text), args.flavor, options) + (get_fontdesc(fonts, fontspec), ''.join(text), options) for fontspec, text in all_font_texts.items()) subsetted = fpool.starmap(subset, subset_args) fpool.starmap(write_subset, chain(*subsetted)) |