From 33c4460bed1488c9c313f9646514914852b45f32 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Sun, 8 Mar 2020 20:22:55 -0400 Subject: various fixes and optimizations, add README --- README | 3 + syntax-highlighting-server.py | 187 ++++++++++++++++++++++++------------------ 2 files changed, 111 insertions(+), 79 deletions(-) create mode 100644 README diff --git a/README b/README new file mode 100644 index 0000000..6ba0fbd --- /dev/null +++ b/README @@ -0,0 +1,3 @@ +Dedicated syntax highlighting server for cgit. + +Mitigates the issue of python import time for cgit source-filters. diff --git a/syntax-highlighting-server.py b/syntax-highlighting-server.py index 5aad034..dabd631 100755 --- a/syntax-highlighting-server.py +++ b/syntax-highlighting-server.py @@ -13,16 +13,13 @@ import argparse import logging -import socket -import selectors -import sys from http.server import BaseHTTPRequestHandler, HTTPServer from urllib.parse import parse_qs, unquote from pygments import highlight from pygments.formatters import HtmlFormatter -from pygments.lexers import LEXERS, _load_lexers, guess_lexer, guess_lexer_for_filename +from pygments.lexers import guess_lexer, guess_lexer_for_filename from pygments.lexers.special import TextLexer from pygments.util import ClassNotFound @@ -33,10 +30,10 @@ class HighlightingHTTPServer(HTTPServer): # sudden surge can easily overwhelm that. request_queue_size = 64 - def __init__(self, *args, style='pastie', **kwargs): + def __init__(self, *args, formatter, style_defs, **kwargs): super().__init__(*args, **kwargs) - self.formatter = HtmlFormatter(style=style, nobackground=True, encoding='utf-8') - self.style_defs = ('').encode('utf-8') + self.formatter = formatter + self.style_defs = style_defs class HighlightingHandler(BaseHTTPRequestHandler): # read by BaseHTTPRequestHandler. need this so that curl doesn't delay @@ -75,12 +72,10 @@ class HighlightingHandler(BaseHTTPRequestHandler): self.wfile.write(self.server.style_defs) highlight(data, lexer, self.server.formatter, outfile=self.wfile) -def main(): - logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) - +def parse_args(): parser = argparse.ArgumentParser(description='syntax highlighting server', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--host', type=str, default='localhost', + parser.add_argument('--host', type=str, default='127.0.0.1', help=''' The hostname or IP address to listen on. Note that it is insecure to run syntax-highlighting-server on a public network. @@ -105,7 +100,12 @@ def main(): The number of servers to run in reuseport mode. Ignored in other modes. auto means one for each CPU. ''') - args = parser.parse_args() + return parser.parse_args() + +def main(): + logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) + + args = parse_args() if args.host not in ('localhost', '127.0.0.1', '::1'): logging.warning(''' @@ -113,79 +113,108 @@ def main(): public network. Clients can easily attack http.server or pygments. ''') - logging.info('starting syntax-highlighting on {} port {}'.format(args.host, args.port)) - - # reduces first request latency, costs 5-10 MB RAM - # for forking, this is necessary for any performance at all (otherwise it - # defeats the whole purpose of using a separate process) - for lexer in LEXERS.values(): - _load_lexers(lexer[0]) - __import__('pygments.styles.' + args.style) - - def start_server(MyHTTPServer): - with MyHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server: - logging.info('started syntax-highlighting-server') - server.serve_forever() - - if args.listen_mode == 'single': - start_server(HighlightingHTTPServer) + logging.info('starting syntax-highlighting on {} port {}' + .format(args.host, args.port)) + + try: + # preload lexers + guess_lexer('') + # preload formatter + formatter = HtmlFormatter(style=args.style, + nobackground=True, encoding='utf-8') + # pre-compute style defs + style_defs = ('').encode('utf-8') + # used internally by socket + ''.encode('idna') + + def start_server(MyHTTPServer): + with MyHTTPServer((args.host, args.port), HighlightingHandler, + formatter=formatter, style_defs=style_defs) as server: + logging.info('started syntax-highlighting-server') + server.serve_forever() + + if args.listen_mode == 'single': + start_server(HighlightingHTTPServer) + + elif args.listen_mode == 'forking': + # note: Threading isn't useful for performance because of the GIL + from socketserver import ForkingMixIn + class ForkingHTTPServer(ForkingMixIn, HighlightingHTTPServer): + pass + start_server(ForkingHTTPServer) + + elif args.listen_mode == 'reuseport': + import os + import selectors + import socket + import sys + + class ReusePortHTTPServer(HighlightingHTTPServer): + def server_bind(self): + self.socket.setsockopt( + socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + super().server_bind() + + # check that we can bind to the port + # keep tmp_server around to avoid TOCTOU + tmp_server = ReusePortHTTPServer((args.host, args.port), None, + formatter=None, style_defs=None) + + if args.num_servers != 'auto': + num_servers = int(args.num_servers) + elif hasattr(os, 'sched_getaffinity'): + num_servers = len(os.sched_getaffinity(0)) + else: + num_servers = os.cpu_count() + logging.info('starting {} servers'.format(num_servers)) + + pipe = os.pipe() - elif args.listen_mode == 'forking': - # note: Threading isn't useful for performance because of the GIL - from socketserver import ForkingMixIn - class ForkingHTTPServer(ForkingMixIn, HighlightingHTTPServer): - pass - start_server(ForkingHTTPServer) - - elif args.listen_mode == 'reuseport': - import os - import signal - - if not hasattr(socket, 'SO_REUSEPORT'): - raise Exception('SO_REUSEPORT not available on this platform') - - class ReusePortHTTPServer(HighlightingHTTPServer): - def server_bind(self): - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) - super().server_bind() - - if args.num_servers == 'auto': - num_servers = len(os.sched_getaffinity(0)) - logging.info('auto-detected {} CPUs'.format(num_servers)) - else: - num_servers = int(args.num_servers) - logging.info('starting {} servers'.format(num_servers)) - - pipe = os.pipe() - - try: for i in range(num_servers): pid = os.fork() if pid == 0: - os.close(pipe[1]) - with ReusePortHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server: - with selectors.DefaultSelector() as selector: - selector.register(server, selectors.EVENT_READ) - selector.register(pipe[0], selectors.EVENT_READ) - while True: - ready = selector.select(None) - for key, events in ready: - if key.fd == pipe[0]: - return - if ready: - server._handle_request_noblock() - server.service_actions() - sys.exit(0) + tmp_server.server_close() + try: + os.close(pipe[1]) + with ReusePortHTTPServer((args.host, args.port), + HighlightingHandler, formatter=formatter, + style_defs=style_defs) as server: + with selectors.DefaultSelector() as selector: + selector.register(server, selectors.EVENT_READ) + selector.register(pipe[0], selectors.EVENT_READ) + while True: + ready = selector.select(None) + for key, events in ready: + if key.fd == pipe[0]: + sys.exit(0) + if ready: + server._handle_request_noblock() + server.service_actions() + except KeyboardInterrupt: + # Ctrl-C on the command line sends SIGINT to the whole + # process group. we could wait for the pipe, but just exit + # now + sys.exit(0) + except Exception: + # try to keep exception message together + # the default virtually guarantees mangled output + import traceback + sys.stderr.write(traceback.format_exc()) + sys.exit(1) os.close(pipe[0]) + tmp_server.server_close() logging.info('started syntax-highlighting-server') - os.wait() - logging.info('worker died, shutting down syntax-highlighting-server') - except KeyboardInterrupt: - if pid != 0: - logging.info('ctrl-c received, shutting down syntax-highlighting-server') - - else: - raise Exception('invalid listen mode: {}'.format(args.listen_mode)) + pid, status = os.wait() + logging.info('worker {} died, shutting down syntax-highlighting-server' + .format(pid)) + + else: + raise Exception('invalid listen mode: {}'.format(args.listen_mode)) + + except KeyboardInterrupt: + logging.info('ctrl-c received, shutting down syntax-highlighting-server') if __name__ == '__main__': main() -- cgit v1.2.3-70-g09d2