From 5b1c4639f058d04a0089b3b323c8fad39c4a97a3 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Fri, 13 Aug 2021 17:03:28 -0400 Subject: accept command-line args --- config.py | 46 --------------------------------- wfs.py | 89 +++++++++++++++++++++++++++++++++------------------------------ 2 files changed, 47 insertions(+), 88 deletions(-) delete mode 100644 config.py diff --git a/config.py b/config.py deleted file mode 100644 index 6ee0c29..0000000 --- a/config.py +++ /dev/null @@ -1,46 +0,0 @@ -from fontTools.subset import Options - -config = { - # chrome or firefox - 'selenium_driver_name': 'chrome', - - # take screenshots before and after and check that they are identical - 'screenshots': True, - - # pyftsubset options, see pyftsubset --help - 'ftsubset_options': [ - '--desubroutinize', - '--layout-scripts=latn', - '--layout-features-=curs,dnom,frac,locl,mark,mkmk', - '--name-IDs=', - '--with-zopfli', - ], - - # font configuration table. - # family: no default, mandatory - # weight: must be string, mandatory - # fontfile: must be string, mandatory - # extratext: non-DOM text to add to subset (e.g. CSS content). default: none - # outfile: dict of {woff, woff2} output filenames. default: s/.[ot]tf$/.subset.woff(2)/ - 'fonts': [ - { - 'family': 'EB Garamond', - 'weight': '400', - 'style': 'italic', - 'fontfile': 'EBGaramond-Italic.otf', - 'extratext': '@' - }, - { - 'family': 'EB Garamond', - 'weight': '400', - 'style': 'normal', - 'fontfile': 'EBGaramond-Regular.otf' - }, - { - 'family': 'EB Garamond', - 'weight': '500', - 'style': 'normal', - 'fontfile': 'EBGaramond-Medium.otf' - } - ], -} diff --git a/wfs.py b/wfs.py index c932c3d..43a365f 100755 --- a/wfs.py +++ b/wfs.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import argparse import logging import re import os @@ -16,37 +17,10 @@ from urllib.parse import urlparse from fontTools.subset import Options, Subsetter, load_font from selenium import webdriver -from config import config - logging.basicConfig(format='[%(relativeCreated)d] %(message)s') logger = logging.getLogger('websubset') logger.setLevel(logging.INFO) -options = Options() -options.parse_opts(config['ftsubset_options']) -if options.with_zopfli: - from fontTools.ttLib import sfnt - sfnt.USE_ZOPFLI = True - -if config['screenshots']: - from io import BytesIO - from PIL import Image, ImageChops - def gen_font_face(font): - if 'fontfile' not in font: - return '' - return ''.join([ - '@font-face{', - 'font-family:"', font['family'], '";', - 'font-weight:', font['weight'], ';', - 'font-style:', font['style'], ';', - 'src: url("', font["fontfile"], '");', - '}']) - - REPLACE_FONTS_SCRIPT = ''.join([ - "let style = document.createElement('style'); style.innerHTML = '", - ''.join(gen_font_face(font) for font in config['fonts']), - "'; document.body.appendChild(style);"]) - EXTRACT_SCRIPT = r''' let whitelist = new Set(arguments[0]); let walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); @@ -68,6 +42,17 @@ EXTRACT_SCRIPT = r''' return dict; ''' +def gen_font_face(font): + if 'fontfile' not in font: + return '' + return ''.join([ + '@font-face{', + 'font-family:"', font['family'], '";', + 'font-weight:', font['weight'], ';', + 'font-style:', font['style'], ';', + 'src: url("', font["fontfile"], '");', + '}']) + DRIVER = None def stop_driver(): global DRIVER @@ -118,12 +103,15 @@ def make_uri(path): else: return Path(path).resolve().as_uri() -def extract(path, whitelist, screenshots): +def extract(path, fonts, screenshots): logger.info('fetching %s', path) DRIVER.get(make_uri(path)) if screenshots: logger.info('replacing fonts for %s', path) - DRIVER.execute_script(REPLACE_FONTS_SCRIPT) + DRIVER.execute_script(''.join([ + "let style = document.createElement('style'); style.innerHTML = '", + ''.join(gen_font_face(font) for font in fonts), + "'; document.body.appendChild(style);"])) logger.info('taking pre-screenshot for %s', path) height = DRIVER.execute_script('return document.body.parentNode.scrollHeight') DRIVER.set_window_size(2000, height) @@ -131,6 +119,7 @@ def extract(path, whitelist, screenshots): else: screenshot = None logger.info('extracting text from %s', path) + whitelist = [';'.join((f['family'], f['weight'], f['style'])) for f in fonts] return (path, DRIVER.execute_script(EXTRACT_SCRIPT, whitelist), screenshot) def get_fontdesc(fonts, fontspec): @@ -190,37 +179,53 @@ class LocalPool: stop_driver() def main(argv): + parser = argparse.ArgumentParser(description='Web Font Subsetter', epilog='see pyftsubset --help for additional options') + parser.add_argument('--driver', help='selenium driver name (chrome or firefox)', default='chrome') + parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true') + parser.add_argument('--font', help='add font (family:weight:style:fontfile)', action='append') + parser.add_argument('file', help='html files', nargs='+') + args, leftover = parser.parse_known_intermixed_args(argv) + options = Options() + files = args.file + options.parse_opts(leftover) + if any([file[0] == '-' for file in files]): + parser.print_usage() + raise Exception('bad arguments') + if options.with_zopfli: + from fontTools.ttLib import sfnt + sfnt.USE_ZOPFLI = True ncpus = len(os.sched_getaffinity(0)) - nfontfiles = sum('fontfile' in font for font in config['fonts']) - nwworkers = min(len(argv) - 1, ncpus) - nfworkers = min(nfontfiles * 2, ncpus) + fonts = [dict(zip(['family', 'weight', 'style', 'fontfile'], font.split(':'))) for font in args.font] + nwworkers = min(len(files), ncpus) + nfworkers = min(len(fonts) * 2, ncpus) logger.info('starting %d web workers, %d font workers', nwworkers, nfworkers) + + if not args.no_screenshots: + from io import BytesIO + from PIL import Image, ImageChops + with Pool(nfworkers) as fpool, \ - Pool(nwworkers, start_wworker, (config['selenium_driver_name'],)) as wpool: + Pool(nwworkers, start_wworker, (args.driver,)) as wpool: all_font_texts = {} screenshots = [] - whitelist = [ - ';'.join((f['family'], f['weight'], f['style'])) - for f in config['fonts']] - extract_args = ((arg, whitelist, config['screenshots']) for arg in sys.argv[1:]) + extract_args = ((file, fonts, not args.no_screenshots) for file in args.file) extracted = wpool.starmap(extract, extract_args) for path, font_texts, screenshot in extracted: - if config['screenshots']: + if not args.no_screenshots: screenshots.append((path, screenshot)) for fontspec, text in font_texts.items(): if fontspec in all_font_texts: all_font_texts[fontspec] |= set(text) else: all_font_texts[fontspec] = set(text) - if not config['screenshots']: + if args.no_screenshots: logger.info('shutting down web workers early') wpool.close() subset_args = ( - (get_fontdesc(config['fonts'], fontspec), ''.join(text), options) + (get_fontdesc(fonts, fontspec), ''.join(text), options) for fontspec, text in all_font_texts.items()) subsetted = fpool.starmap(subset, subset_args) fpool.starmap(write_subset, chain(*subsetted)) - if config['screenshots']: + if not args.no_screenshots: wpool.starmap(verify, screenshots, 1) if multiprocessing.active_children(): logger.info('waiting for workers') @@ -229,4 +234,4 @@ def main(argv): logger.info('exiting successfully') if __name__ == '__main__': - main(sys.argv) + main(sys.argv[1:]) -- cgit v1.2.3-70-g09d2