summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Xu (Hello71) <alex_y_xu@yahoo.ca>2021-08-13 17:03:28 -0400
committerAlex Xu (Hello71) <alex_y_xu@yahoo.ca>2021-08-13 17:03:28 -0400
commit5b1c4639f058d04a0089b3b323c8fad39c4a97a3 (patch)
treefe3c58bd38080b0f3728d3cc740dfe222b408df9
parentdd3a70f877edd807bd2ab5184b831c2f4335e3e7 (diff)
downloadwfs-5b1c4639f058d04a0089b3b323c8fad39c4a97a3.tar.xz
wfs-5b1c4639f058d04a0089b3b323c8fad39c4a97a3.zip
accept command-line args
-rw-r--r--config.py46
-rwxr-xr-xwfs.py89
2 files changed, 47 insertions, 88 deletions
diff --git a/config.py b/config.py
deleted file mode 100644
index 6ee0c29..0000000
--- a/config.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from fontTools.subset import Options
-
-config = {
- # chrome or firefox
- 'selenium_driver_name': 'chrome',
-
- # take screenshots before and after and check that they are identical
- 'screenshots': True,
-
- # pyftsubset options, see pyftsubset --help
- 'ftsubset_options': [
- '--desubroutinize',
- '--layout-scripts=latn',
- '--layout-features-=curs,dnom,frac,locl,mark,mkmk',
- '--name-IDs=',
- '--with-zopfli',
- ],
-
- # font configuration table.
- # family: no default, mandatory
- # weight: must be string, mandatory
- # fontfile: must be string, mandatory
- # extratext: non-DOM text to add to subset (e.g. CSS content). default: none
- # outfile: dict of {woff, woff2} output filenames. default: s/.[ot]tf$/.subset.woff(2)/
- 'fonts': [
- {
- 'family': 'EB Garamond',
- 'weight': '400',
- 'style': 'italic',
- 'fontfile': 'EBGaramond-Italic.otf',
- 'extratext': '@'
- },
- {
- 'family': 'EB Garamond',
- 'weight': '400',
- 'style': 'normal',
- 'fontfile': 'EBGaramond-Regular.otf'
- },
- {
- 'family': 'EB Garamond',
- 'weight': '500',
- 'style': 'normal',
- 'fontfile': 'EBGaramond-Medium.otf'
- }
- ],
-}
diff --git a/wfs.py b/wfs.py
index c932c3d..43a365f 100755
--- a/wfs.py
+++ b/wfs.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
+import argparse
import logging
import re
import os
@@ -16,37 +17,10 @@ from urllib.parse import urlparse
from fontTools.subset import Options, Subsetter, load_font
from selenium import webdriver
-from config import config
-
logging.basicConfig(format='[%(relativeCreated)d] %(message)s')
logger = logging.getLogger('websubset')
logger.setLevel(logging.INFO)
-options = Options()
-options.parse_opts(config['ftsubset_options'])
-if options.with_zopfli:
- from fontTools.ttLib import sfnt
- sfnt.USE_ZOPFLI = True
-
-if config['screenshots']:
- from io import BytesIO
- from PIL import Image, ImageChops
- def gen_font_face(font):
- if 'fontfile' not in font:
- return ''
- return ''.join([
- '@font-face{',
- 'font-family:"', font['family'], '";',
- 'font-weight:', font['weight'], ';',
- 'font-style:', font['style'], ';',
- 'src: url("', font["fontfile"], '");',
- '}'])
-
- REPLACE_FONTS_SCRIPT = ''.join([
- "let style = document.createElement('style'); style.innerHTML = '",
- ''.join(gen_font_face(font) for font in config['fonts']),
- "'; document.body.appendChild(style);"])
-
EXTRACT_SCRIPT = r'''
let whitelist = new Set(arguments[0]);
let walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
@@ -68,6 +42,17 @@ EXTRACT_SCRIPT = r'''
return dict;
'''
+def gen_font_face(font):
+ if 'fontfile' not in font:
+ return ''
+ return ''.join([
+ '@font-face{',
+ 'font-family:"', font['family'], '";',
+ 'font-weight:', font['weight'], ';',
+ 'font-style:', font['style'], ';',
+ 'src: url("', font["fontfile"], '");',
+ '}'])
+
DRIVER = None
def stop_driver():
global DRIVER
@@ -118,12 +103,15 @@ def make_uri(path):
else:
return Path(path).resolve().as_uri()
-def extract(path, whitelist, screenshots):
+def extract(path, fonts, screenshots):
logger.info('fetching %s', path)
DRIVER.get(make_uri(path))
if screenshots:
logger.info('replacing fonts for %s', path)
- DRIVER.execute_script(REPLACE_FONTS_SCRIPT)
+ DRIVER.execute_script(''.join([
+ "let style = document.createElement('style'); style.innerHTML = '",
+ ''.join(gen_font_face(font) for font in fonts),
+ "'; document.body.appendChild(style);"]))
logger.info('taking pre-screenshot for %s', path)
height = DRIVER.execute_script('return document.body.parentNode.scrollHeight')
DRIVER.set_window_size(2000, height)
@@ -131,6 +119,7 @@ def extract(path, whitelist, screenshots):
else:
screenshot = None
logger.info('extracting text from %s', path)
+ whitelist = [';'.join((f['family'], f['weight'], f['style'])) for f in fonts]
return (path, DRIVER.execute_script(EXTRACT_SCRIPT, whitelist), screenshot)
def get_fontdesc(fonts, fontspec):
@@ -190,37 +179,53 @@ class LocalPool:
stop_driver()
def main(argv):
+ parser = argparse.ArgumentParser(description='Web Font Subsetter', epilog='see pyftsubset --help for additional options')
+ parser.add_argument('--driver', help='selenium driver name (chrome or firefox)', default='chrome')
+ parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true')
+ parser.add_argument('--font', help='add font (family:weight:style:fontfile)', action='append')
+ parser.add_argument('file', help='html files', nargs='+')
+ args, leftover = parser.parse_known_intermixed_args(argv)
+ options = Options()
+ files = args.file + options.parse_opts(leftover)
+ if any([file[0] == '-' for file in files]):
+ parser.print_usage()
+ raise Exception('bad arguments')
+ if options.with_zopfli:
+ from fontTools.ttLib import sfnt
+ sfnt.USE_ZOPFLI = True
ncpus = len(os.sched_getaffinity(0))
- nfontfiles = sum('fontfile' in font for font in config['fonts'])
- nwworkers = min(len(argv) - 1, ncpus)
- nfworkers = min(nfontfiles * 2, ncpus)
+ fonts = [dict(zip(['family', 'weight', 'style', 'fontfile'], font.split(':'))) for font in args.font]
+ nwworkers = min(len(files), ncpus)
+ nfworkers = min(len(fonts) * 2, ncpus)
logger.info('starting %d web workers, %d font workers', nwworkers, nfworkers)
+
+ if not args.no_screenshots:
+ from io import BytesIO
+ from PIL import Image, ImageChops
+
with Pool(nfworkers) as fpool, \
- Pool(nwworkers, start_wworker, (config['selenium_driver_name'],)) as wpool:
+ Pool(nwworkers, start_wworker, (args.driver,)) as wpool:
all_font_texts = {}
screenshots = []
- whitelist = [
- ';'.join((f['family'], f['weight'], f['style']))
- for f in config['fonts']]
- extract_args = ((arg, whitelist, config['screenshots']) for arg in sys.argv[1:])
+ extract_args = ((file, fonts, not args.no_screenshots) for file in args.file)
extracted = wpool.starmap(extract, extract_args)
for path, font_texts, screenshot in extracted:
- if config['screenshots']:
+ if not args.no_screenshots:
screenshots.append((path, screenshot))
for fontspec, text in font_texts.items():
if fontspec in all_font_texts:
all_font_texts[fontspec] |= set(text)
else:
all_font_texts[fontspec] = set(text)
- if not config['screenshots']:
+ if args.no_screenshots:
logger.info('shutting down web workers early')
wpool.close()
subset_args = (
- (get_fontdesc(config['fonts'], fontspec), ''.join(text), options)
+ (get_fontdesc(fonts, fontspec), ''.join(text), options)
for fontspec, text in all_font_texts.items())
subsetted = fpool.starmap(subset, subset_args)
fpool.starmap(write_subset, chain(*subsetted))
- if config['screenshots']:
+ if not args.no_screenshots:
wpool.starmap(verify, screenshots, 1)
if multiprocessing.active_children():
logger.info('waiting for workers')
@@ -229,4 +234,4 @@ def main(argv):
logger.info('exiting successfully')
if __name__ == '__main__':
- main(sys.argv)
+ main(sys.argv[1:])