diff options
author | Alex Xu (Hello71) <alex_y_xu@yahoo.ca> | 2022-03-02 14:35:07 -0500 |
---|---|---|
committer | Alex Xu (Hello71) <alex_y_xu@yahoo.ca> | 2022-03-02 14:35:07 -0500 |
commit | 899b674f782331e520e277c55710ebab4de616ff (patch) | |
tree | b041fd9e76ed994eb718411469fc46e8b9fcf9f5 | |
parent | 6ae1a9a2b69fd0e634ebafbabcd554f47e8bf7ea (diff) | |
download | wfs-master.tar.xz wfs-master.zip |
-rwxr-xr-x | wfs.py | 35 |
1 files changed, 21 insertions, 14 deletions
@@ -15,6 +15,7 @@ logger = logging.getLogger('wfs') logger.setLevel(logging.INFO) def make_uri(path): + """Return path if it looks like a URL, otherwise convert it to a file: URL""" if ':' in path: return path return pathlib.Path(path).resolve().as_uri() @@ -53,6 +54,7 @@ def main(argv): help='selenium driver name (chrome or firefox)', default='chrome') parser.add_argument('--no-screenshots', help='skip screenshot validation', action='store_true') parser.add_argument('--font', help='add font (fontfile:family:weight:style)', action='append') + parser.add_argument('--width', help='browser width', type=int, default=1920) parser.add_argument('file', help='html files', nargs='+') args, leftover = parser.parse_known_intermixed_args(argv) options = fontTools.subset.Options() @@ -75,8 +77,10 @@ def main(argv): time.sleep = lambda secs: sleep(min(secs, 0.1)) with start_driver(args.driver) as driver: + driver.set_window_size(args.width, args.width*0.5625) # 9:16 = 0.5625 font_texts = {} screenshots = [] + for path in args.file: logger.info('fetching %s', path) driver.get(make_uri(path)) @@ -95,8 +99,9 @@ def main(argv): src: url({fontfile}); }}''' for fontdesc, fontfile in fonts.items())) logger.info('taking pre-screenshot for %s', path) - driver.set_window_size(1920, height) + driver.set_window_size(args.width, height) screenshots.append((path, driver.get_screenshot_as_png())) + logger.info('extracting text from %s', path) for fontstr, text in driver.execute_script(r''' const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); @@ -113,22 +118,23 @@ def main(argv): return dict; ''').items(): - fontspec = tuple(fontstr.split(';')) - if fontspec in font_texts: - font_texts[fontspec] |= set(text) - else: - font_texts[fontspec] = set(text) + font_texts.setdefault(tuple(fontstr.split(';')), set()).update(text) + if args.no_screenshots: - logger.info('shutting down driver') + logger.info('done extracting texts, shutting down driver') driver.close() - with multiprocessing.Pool(min(len(fonts), len(os.sched_getaffinity(0)))) as fpool: - jobs = [] - for fontspec, text in font_texts.items(): - try: - jobs.append((fonts[fontspec], ''.join(text), options)) - except KeyError: - logger.warning('missing font %s', fontspec) + + nworkers = min(len(fonts), len(os.sched_getaffinity(0))) + jobs = [] + for fontspec, text in font_texts.items(): + if fontspec in fonts: + jobs.append((fonts[fontspec], ''.join(text), options)) + else: + logger.warning('missing font %s', fontspec) + logger.info('starting %d subset workers for %d jobs', nworkers, len(jobs)) + with multiprocessing.Pool(nworkers) as fpool: fpool.starmap(subset, jobs) + while screenshots: path, start_png = screenshots.pop() start = Image.open(BytesIO(start_png), formats=('PNG',)) @@ -141,6 +147,7 @@ def main(argv): end = Image.open(BytesIO(driver.get_screenshot_as_png()), formats=('PNG',)) if ImageChops.difference(start.convert('RGB'), end.convert('RGB')).getbbox(): raise Exception(f'screenshots do not match for {path}') + logger.info('exiting successfully') if __name__ == '__main__': |