#!/usr/bin/env python3 import argparse import logging import multiprocessing import os import pathlib import sys import fontTools.subset from selenium import webdriver logging.basicConfig(format='%(levelname)s: %(message)s') logger = logging.getLogger('wfs') logger.setLevel(logging.INFO) def make_uri(path): """Return path if it looks like a URL, otherwise convert it to a file: URL""" if ':' in path: return path return pathlib.Path(path).resolve().as_uri() def start_driver(driver_name): if driver_name == 'chrome': chrome_options = webdriver.chrome.options.Options() chrome_options.headless = True chrome_options.experimental_options["prefs"] = { "profile.default_content_setting_values.images": 2 } return webdriver.Chrome(options=chrome_options) if driver_name == 'firefox': firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference('permissions.default.image', 2) firefox_options = webdriver.firefox.options.Options() firefox_options.headless = True return webdriver.Firefox(firefox_profile=firefox_profile, options=firefox_options) raise Exception('unknown driver name') def subset(fontfile, text, fts_opts): logger.info('subsetting %s', fontfile) font = fontTools.subset.load_font(fontfile, fts_opts, dontLoadGlyphNames=True) subsetter = fontTools.subset.Subsetter(options=fts_opts) subsetter.populate(text=text) subsetter.subset(font) font.flavor = 'woff2' outfile = fontfile[:fontfile.rindex('.')] + '.subset.' + font.flavor logger.info('writing %s', outfile) font.save(outfile) def main(argv): parser = argparse.ArgumentParser(description='Web Font Subsetter', epilog='see pyftsubset --help for additional options') parser.add_argument('--driver', help='selenium driver name (chrome or firefox)', default='chrome') parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true') parser.add_argument('--font', help='add font (fontfile:family:weight:style)', action='append') parser.add_argument('--width', help='browser width', type=int, default=1920) parser.add_argument('file', help='html files', nargs='+') args, leftover = parser.parse_known_intermixed_args(argv) options = fontTools.subset.Options() files = args.file + options.parse_opts(leftover) if any(file[0] == '-' for file in files): parser.print_usage() raise Exception('bad arguments') if not args.no_screenshots: from io import BytesIO from PIL import Image, ImageChops fonts = {} for font in args.font: fontlst = font.split(':') fonts[(fontlst[1], fontlst[2] or '400', fontlst[3] or 'normal')] = fontlst[0] # clamp selenium wasteful sleeps import time sleep = time.sleep time.sleep = lambda secs: sleep(min(secs, 0.1)) with start_driver(args.driver) as driver: driver.set_window_size(args.width, args.width*0.5625) # 9:16 = 0.5625 font_texts = {} screenshots = [] for path in args.file: logger.info('fetching %s', path) driver.get(make_uri(path)) if not args.no_screenshots: logger.info('preparing %s for screenshot', path) height = driver.execute_script(""" let style = document.createElement('style'); style.innerHTML = arguments[0]; document.body.appendChild(style); return document.documentElement.scrollHeight; """, ''.join(f''' @font-face{{ font-family: "{fontdesc[0]}"; font-weight: {fontdesc[1]}; font-style: {fontdesc[2]}; src: url({fontfile}); }}''' for fontdesc, fontfile in fonts.items())) logger.info('taking pre-screenshot for %s', path) driver.set_window_size(args.width, height) screenshots.append((path, driver.get_screenshot_as_png())) logger.info('extracting text from %s', path) for fontstr, text in driver.execute_script(r''' const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); let node, dict = {}; while (node = walker.nextNode()) { const cs = getComputedStyle(node.parentNode); const css = k => cs.getPropertyValue(k); if (css('display') == 'none') continue; const k = css('font-family').replace(/"/g, '').replace(/,.*/, '') + ';' + css('font-weight') + ';' + css('font-style'); if (k in dict) dict[k] += node.nodeValue; else dict[k] = node.nodeValue; } return dict; ''').items(): font_texts.setdefault(tuple(fontstr.split(';')), set()).update(text) if args.no_screenshots: logger.info('done extracting texts, shutting down driver') driver.close() nworkers = min(len(fonts), len(os.sched_getaffinity(0))) jobs = [] for fontspec, text in font_texts.items(): if fontspec in fonts: jobs.append((fonts[fontspec], ''.join(text), options)) else: logger.warning('missing font %s', fontspec) logger.info('starting %d subset workers for %d jobs', nworkers, len(jobs)) with multiprocessing.Pool(nworkers) as fpool: fpool.starmap(subset, jobs) while screenshots: path, start_png = screenshots.pop() start = Image.open(BytesIO(start_png), formats=('PNG',)) logger.info('checking %dx%d screenshot for %s', *start.size, path) driver.get(make_uri(path)) logger.info('taking post-screenshot for %s', path) driver.set_window_size(*start.size) end = Image.open(BytesIO(driver.get_screenshot_as_png()), formats=('PNG',)) if ImageChops.difference(start.convert('RGB'), end.convert('RGB')).getbbox(): raise Exception(f'screenshots do not match for {path}') logger.info('exiting successfully') if __name__ == '__main__': main(sys.argv[1:])