summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Xu (Hello71) <alex_y_xu@yahoo.ca>2021-10-05 12:45:38 -0400
committerAlex Xu (Hello71) <alex_y_xu@yahoo.ca>2021-10-05 12:45:38 -0400
commitbc64b0cc23da05cc845cc2fb72f04eafa8fa04de (patch)
tree71951af19fd7591c1d32f80dc20d5a86c8dd310d
parent7b82460ff5fe21fc4d2a98bb425d77bcb44dce7b (diff)
downloadwfs-bc64b0cc23da05cc845cc2fb72f04eafa8fa04de.tar.xz
wfs-bc64b0cc23da05cc845cc2fb72f04eafa8fa04de.zip
optimize, remove broken --flavor, fix screenshots
-rwxr-xr-xwfs.py61
1 files changed, 24 insertions, 37 deletions
diff --git a/wfs.py b/wfs.py
index 94b29df..0747bec 100755
--- a/wfs.py
+++ b/wfs.py
@@ -2,13 +2,12 @@
import argparse
import logging
-import re
+import multiprocessing
import os
import signal
import sys
from itertools import chain
-import multiprocessing
from multiprocessing import Pool
from multiprocessing.util import Finalize
from pathlib import Path
@@ -59,12 +58,6 @@ def stop_driver():
if DRIVER:
DRIVER.quit()
DRIVER = None
-def hook_sys(name):
- orig_hook = getattr(sys, name)
- def my_hook(*args, **kwargs):
- stop_driver()
- orig_hook(*args, **kwargs)
- setattr(sys, name, my_hook)
def hook_sig(signum):
orig_handler = signal.getsignal(signum)
if orig_handler is None:
@@ -75,6 +68,11 @@ def hook_sig(signum):
os.kill(os.getpid(), signum)
signal.signal(signum, term_handler)
def start_wworker(driver_name):
+ # clamp selenium wasteful sleeps to 0.1s
+ import time
+ from time import sleep
+ time.sleep = lambda secs: sleep(min(secs, 0.1))
+
hook_sig(signal.SIGTERM)
global DRIVER
if driver_name == 'chrome':
@@ -108,14 +106,13 @@ def extract(path, fonts, screenshots):
DRIVER.get(make_uri(path))
if screenshots:
logger.info('replacing fonts for %s', path)
- DRIVER.execute_script(''.join([
+ height = DRIVER.execute_script(''.join([
"let style = document.createElement('style'); style.innerHTML = '",
''.join(gen_font_face(font) for font in fonts),
- "'; document.body.appendChild(style);"]))
+ "'; document.body.appendChild(style); return document.body.parentNode.scrollHeight"]))
logger.info('taking pre-screenshot for %s', path)
- height = DRIVER.execute_script('return document.body.parentNode.scrollHeight')
- DRIVER.set_window_size(2000, height)
- screenshot = DRIVER.find_element_by_tag_name('body').screenshot_as_png
+ DRIVER.set_window_size(1920, height)
+ screenshot = DRIVER.get_screenshot_as_png()
else:
screenshot = None
logger.info('extracting text from %s', path)
@@ -129,24 +126,16 @@ def get_fontdesc(fonts, fontspec):
return font
return None
-def subset(fontdesc, text, flavors, fts_opts):
+def subset(fontdesc, text, fts_opts):
fontfile = fontdesc['fontfile']
logger.info('subsetting %s', fontfile)
font = load_font(fontfile, fts_opts, dontLoadGlyphNames=True)
subsetter = Subsetter(options=fts_opts)
- if 'extratext' in fontdesc:
- text += fontdesc['extratext']
subsetter.populate(text=text)
subsetter.subset(font)
ret = []
- for flavor in flavors:
- if 'outfile' in fontdesc and flavor in fontdesc['outfile']:
- outfile = fontdesc['outfile'][flavor]
- else:
- outfile = re.sub(r'\.[ot]tf$', f'.subset.{flavor}', fontfile)
- if outfile == fontfile:
- raise Exception('cannot overwrite font file')
- ret.append((font, flavor, outfile))
+ outfile = fontfile[:fontfile.rindex('.')] + '.subset.woff2'
+ ret.append((font, 'woff2', outfile))
return ret
def write_subset(font, flavor, outfile):
@@ -157,14 +146,17 @@ def write_subset(font, flavor, outfile):
def verify(path, screenshot_begin_png):
logger.info('refetching %s', path)
DRIVER.get(make_uri(path))
+
+ from io import BytesIO
+ from PIL import Image, ImageChops
+ screenshot_begin = Image.open(BytesIO(screenshot_begin_png)).convert('RGB')
+
logger.info('taking post-screenshot for %s', path)
- height = DRIVER.execute_script('return document.body.parentNode.scrollHeight')
- DRIVER.set_window_size(2000, height)
- screenshot_end_png = DRIVER.find_element_by_tag_name('body').screenshot_as_png
+ DRIVER.set_window_size(*screenshot_begin.size)
+ screenshot_end_png = DRIVER.get_screenshot_as_png()
logger.info('checking screenshot for %s', path)
- screenshot_begin = Image.open(BytesIO(screenshot_begin_png))
- screenshot_end = Image.open(BytesIO(screenshot_end_png))
+ screenshot_end = Image.open(BytesIO(screenshot_end_png)).convert('RGB')
if ImageChops.difference(screenshot_begin, screenshot_end).getbbox():
raise Exception(f'screenshots do not match for {path}')
@@ -183,7 +175,6 @@ def main(argv):
parser.add_argument('--driver', help='selenium driver name (chrome or firefox)', default='chrome')
parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true')
parser.add_argument('--font', help='add font (fontfile:family:weight:style)', action='append')
- parser.add_argument('--flavor', help='add flavor (woff or woff2)', action='append', default=['woff2'])
parser.add_argument('file', help='html files', nargs='+')
args, leftover = parser.parse_known_intermixed_args(argv)
options = Options()
@@ -197,12 +188,8 @@ def main(argv):
ncpus = len(os.sched_getaffinity(0))
fonts = [dict(zip(['fontfile', 'family', 'weight', 'style'], font.split(':'))) for font in args.font]
nwworkers = min(len(files), ncpus)
- nfworkers = min(len(fonts) * 2, ncpus)
- logger.info('starting %d web workers, %d font workers', nwworkers, nfworkers)
-
- if not args.no_screenshots:
- from io import BytesIO
- from PIL import Image, ImageChops
+ nfworkers = min(len(fonts), ncpus)
+ logger.info('using %d web workers, %d font workers', nwworkers, nfworkers)
with Pool(nfworkers) as fpool, \
Pool(nwworkers, start_wworker, (args.driver,)) as wpool:
@@ -222,7 +209,7 @@ def main(argv):
logger.info('shutting down web workers early')
wpool.close()
subset_args = (
- (get_fontdesc(fonts, fontspec), ''.join(text), args.flavor, options)
+ (get_fontdesc(fonts, fontspec), ''.join(text), options)
for fontspec, text in all_font_texts.items())
subsetted = fpool.starmap(subset, subset_args)
fpool.starmap(write_subset, chain(*subsetted))