summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README3
-rwxr-xr-xsyntax-highlighting-server.py187
2 files changed, 111 insertions, 79 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..6ba0fbd
--- /dev/null
+++ b/README
@@ -0,0 +1,3 @@
+Dedicated syntax highlighting server for cgit.
+
+Mitigates the issue of python import time for cgit source-filters.
diff --git a/syntax-highlighting-server.py b/syntax-highlighting-server.py
index 5aad034..dabd631 100755
--- a/syntax-highlighting-server.py
+++ b/syntax-highlighting-server.py
@@ -13,16 +13,13 @@
import argparse
import logging
-import socket
-import selectors
-import sys
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import parse_qs, unquote
from pygments import highlight
from pygments.formatters import HtmlFormatter
-from pygments.lexers import LEXERS, _load_lexers, guess_lexer, guess_lexer_for_filename
+from pygments.lexers import guess_lexer, guess_lexer_for_filename
from pygments.lexers.special import TextLexer
from pygments.util import ClassNotFound
@@ -33,10 +30,10 @@ class HighlightingHTTPServer(HTTPServer):
# sudden surge can easily overwhelm that.
request_queue_size = 64
- def __init__(self, *args, style='pastie', **kwargs):
+ def __init__(self, *args, formatter, style_defs, **kwargs):
super().__init__(*args, **kwargs)
- self.formatter = HtmlFormatter(style=style, nobackground=True, encoding='utf-8')
- self.style_defs = ('<style>' + self.formatter.get_style_defs('.highlight') + '</style>').encode('utf-8')
+ self.formatter = formatter
+ self.style_defs = style_defs
class HighlightingHandler(BaseHTTPRequestHandler):
# read by BaseHTTPRequestHandler. need this so that curl doesn't delay
@@ -75,12 +72,10 @@ class HighlightingHandler(BaseHTTPRequestHandler):
self.wfile.write(self.server.style_defs)
highlight(data, lexer, self.server.formatter, outfile=self.wfile)
-def main():
- logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
-
+def parse_args():
parser = argparse.ArgumentParser(description='syntax highlighting server',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument('--host', type=str, default='localhost',
+ parser.add_argument('--host', type=str, default='127.0.0.1',
help='''
The hostname or IP address to listen on. Note that it is
insecure to run syntax-highlighting-server on a public network.
@@ -105,7 +100,12 @@ def main():
The number of servers to run in reuseport mode. Ignored in
other modes. auto means one for each CPU.
''')
- args = parser.parse_args()
+ return parser.parse_args()
+
+def main():
+ logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+ args = parse_args()
if args.host not in ('localhost', '127.0.0.1', '::1'):
logging.warning('''
@@ -113,79 +113,108 @@ def main():
public network. Clients can easily attack http.server or pygments.
''')
- logging.info('starting syntax-highlighting on {} port {}'.format(args.host, args.port))
-
- # reduces first request latency, costs 5-10 MB RAM
- # for forking, this is necessary for any performance at all (otherwise it
- # defeats the whole purpose of using a separate process)
- for lexer in LEXERS.values():
- _load_lexers(lexer[0])
- __import__('pygments.styles.' + args.style)
-
- def start_server(MyHTTPServer):
- with MyHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server:
- logging.info('started syntax-highlighting-server')
- server.serve_forever()
-
- if args.listen_mode == 'single':
- start_server(HighlightingHTTPServer)
+ logging.info('starting syntax-highlighting on {} port {}'
+ .format(args.host, args.port))
+
+ try:
+ # preload lexers
+ guess_lexer('')
+ # preload formatter
+ formatter = HtmlFormatter(style=args.style,
+ nobackground=True, encoding='utf-8')
+ # pre-compute style defs
+ style_defs = ('<style>' +
+ formatter.get_style_defs('.highlight') +
+ '</style>').encode('utf-8')
+ # used internally by socket
+ ''.encode('idna')
+
+ def start_server(MyHTTPServer):
+ with MyHTTPServer((args.host, args.port), HighlightingHandler,
+ formatter=formatter, style_defs=style_defs) as server:
+ logging.info('started syntax-highlighting-server')
+ server.serve_forever()
+
+ if args.listen_mode == 'single':
+ start_server(HighlightingHTTPServer)
+
+ elif args.listen_mode == 'forking':
+ # note: Threading isn't useful for performance because of the GIL
+ from socketserver import ForkingMixIn
+ class ForkingHTTPServer(ForkingMixIn, HighlightingHTTPServer):
+ pass
+ start_server(ForkingHTTPServer)
+
+ elif args.listen_mode == 'reuseport':
+ import os
+ import selectors
+ import socket
+ import sys
+
+ class ReusePortHTTPServer(HighlightingHTTPServer):
+ def server_bind(self):
+ self.socket.setsockopt(
+ socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+ super().server_bind()
+
+ # check that we can bind to the port
+ # keep tmp_server around to avoid TOCTOU
+ tmp_server = ReusePortHTTPServer((args.host, args.port), None,
+ formatter=None, style_defs=None)
+
+ if args.num_servers != 'auto':
+ num_servers = int(args.num_servers)
+ elif hasattr(os, 'sched_getaffinity'):
+ num_servers = len(os.sched_getaffinity(0))
+ else:
+ num_servers = os.cpu_count()
+ logging.info('starting {} servers'.format(num_servers))
+
+ pipe = os.pipe()
- elif args.listen_mode == 'forking':
- # note: Threading isn't useful for performance because of the GIL
- from socketserver import ForkingMixIn
- class ForkingHTTPServer(ForkingMixIn, HighlightingHTTPServer):
- pass
- start_server(ForkingHTTPServer)
-
- elif args.listen_mode == 'reuseport':
- import os
- import signal
-
- if not hasattr(socket, 'SO_REUSEPORT'):
- raise Exception('SO_REUSEPORT not available on this platform')
-
- class ReusePortHTTPServer(HighlightingHTTPServer):
- def server_bind(self):
- self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
- super().server_bind()
-
- if args.num_servers == 'auto':
- num_servers = len(os.sched_getaffinity(0))
- logging.info('auto-detected {} CPUs'.format(num_servers))
- else:
- num_servers = int(args.num_servers)
- logging.info('starting {} servers'.format(num_servers))
-
- pipe = os.pipe()
-
- try:
for i in range(num_servers):
pid = os.fork()
if pid == 0:
- os.close(pipe[1])
- with ReusePortHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server:
- with selectors.DefaultSelector() as selector:
- selector.register(server, selectors.EVENT_READ)
- selector.register(pipe[0], selectors.EVENT_READ)
- while True:
- ready = selector.select(None)
- for key, events in ready:
- if key.fd == pipe[0]:
- return
- if ready:
- server._handle_request_noblock()
- server.service_actions()
- sys.exit(0)
+ tmp_server.server_close()
+ try:
+ os.close(pipe[1])
+ with ReusePortHTTPServer((args.host, args.port),
+ HighlightingHandler, formatter=formatter,
+ style_defs=style_defs) as server:
+ with selectors.DefaultSelector() as selector:
+ selector.register(server, selectors.EVENT_READ)
+ selector.register(pipe[0], selectors.EVENT_READ)
+ while True:
+ ready = selector.select(None)
+ for key, events in ready:
+ if key.fd == pipe[0]:
+ sys.exit(0)
+ if ready:
+ server._handle_request_noblock()
+ server.service_actions()
+ except KeyboardInterrupt:
+ # Ctrl-C on the command line sends SIGINT to the whole
+ # process group. we could wait for the pipe, but just exit
+ # now
+ sys.exit(0)
+ except Exception:
+ # try to keep exception message together
+ # the default virtually guarantees mangled output
+ import traceback
+ sys.stderr.write(traceback.format_exc())
+ sys.exit(1)
os.close(pipe[0])
+ tmp_server.server_close()
logging.info('started syntax-highlighting-server')
- os.wait()
- logging.info('worker died, shutting down syntax-highlighting-server')
- except KeyboardInterrupt:
- if pid != 0:
- logging.info('ctrl-c received, shutting down syntax-highlighting-server')
-
- else:
- raise Exception('invalid listen mode: {}'.format(args.listen_mode))
+ pid, status = os.wait()
+ logging.info('worker {} died, shutting down syntax-highlighting-server'
+ .format(pid))
+
+ else:
+ raise Exception('invalid listen mode: {}'.format(args.listen_mode))
+
+ except KeyboardInterrupt:
+ logging.info('ctrl-c received, shutting down syntax-highlighting-server')
if __name__ == '__main__':
main()