summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile23
-rwxr-xr-xsyntax-highlighting-client.sh22
-rwxr-xr-xsyntax-highlighting-server.py189
-rw-r--r--syntax-highlighting.service.in33
4 files changed, 267 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..ad366da
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,23 @@
+prefix = /usr/local
+libdir = $(prefix)/lib
+systemdsystemunitdir = $(libdir)/systemd/system
+
+INSTALL = install
+RM = rm -f
+RMDIR = rmdir
+SED = sed
+
+all:
+
+install:
+ $(INSTALL) -m 0755 -d $(DESTDIR)$(libdir)/cgit
+ $(INSTALL) -m 0755 syntax-highlighting-server.py $(DESTDIR)$(libdir)/cgit/syntax-highlighting-server.py
+ $(INSTALL) -m 0755 -d $(DESTDIR)$(libdir)/cgit/filters
+ $(INSTALL) -m 0755 syntax-highlighting-client.sh $(DESTDIR)$(libdir)/cgit/filters/syntax-highlighting-client.sh
+ $(INSTALL) -m 0755 -d $(DESTDIR)$(systemdsystemunitdir)
+ $(SED) -e 's:@libdir@:$(libdir):' syntax-highlighting.service.in > $(DESTDIR)$(systemdsystemunitdir)/syntax-highlighting.service
+
+uninstall:
+ $(RM) $(DESTDIR)$(libdir)/cgit/syntax-highlighting-server.py
+ $(RM) $(DESTDIR)$(libdir)/cgit/filters/syntax-highlighting-client.sh
+ $(RM) $(DESTDIR)$(systemdsystemunitdir)/syntax-highlighting.service
diff --git a/syntax-highlighting-client.sh b/syntax-highlighting-client.sh
new file mode 100755
index 0000000..76aced1
--- /dev/null
+++ b/syntax-highlighting-client.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# This script can be used to implement syntax highlighting in the cgit
+# tree-view by referring to this file with the source-filter or repo.source-
+# filter options in cgitrc.
+#
+# This script requires that syntax-highlighting-server.py be started to handle
+# highlighting requests.
+
+# configure this to your syntax-highlighting port
+base_url="http://localhost:4872/"
+
+path=$(curl -Gso /dev/null \
+ -w "%{url_effective}" \
+ --data-urlencode "filename=$1" \
+ /highlight)
+
+exec curl -sSf \
+ -H 'Content-Type: text/plain' \
+ -H 'Expect:' \
+ --data-binary @- \
+ "${base_url%/}$path"
diff --git a/syntax-highlighting-server.py b/syntax-highlighting-server.py
new file mode 100755
index 0000000..e093a9a
--- /dev/null
+++ b/syntax-highlighting-server.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+
+# syntax-highlighting-server.py: create a simple HTTP server to highlight
+# source for cgit. improves performance compared to invoking python on every
+# request.
+#
+# Requirements: Python 3, pygments.
+#
+# Usage: Configure your system to run this at boot. Note that this program is
+# not hardened, and it can be trivially DoSed. therefore, do not configure it
+# to listen on a public network. Once configured, set your cgit source filter
+# to syntax-highlighting-client.sh.
+
+import argparse
+import logging
+import socket
+import selectors
+import sys
+
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from urllib.parse import parse_qs, unquote
+
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import LEXERS, _load_lexers, guess_lexer, guess_lexer_for_filename
+from pygments.lexers.special import TextLexer
+from pygments.util import ClassNotFound
+
+class BaseHTTPServer(HTTPServer):
+ # set SO_REUSEADDR
+ allow_reuse_address = True
+ # socketserver defaults to 5. especially in listen mode 'single', a
+ # sudden surge can easily overwhelm that.
+ request_queue_size = 128
+
+formatter = HtmlFormatter(style='pastie', nobackground=True, encoding='utf-8')
+style_defs = ('<style>' + formatter.get_style_defs('.highlight') + '</style>').encode('utf-8')
+
+class HighlightingHandler(BaseHTTPRequestHandler):
+ # read by BaseHTTPRequestHandler. need this so that curl doesn't delay
+ # waiting for 100-continue
+ protocol_version = 'HTTP/1.1'
+
+ def do_POST(self):
+ qs = parse_qs(self.path.split('?', 1)[1])
+ if len(qs['filename']) != 1:
+ raise ValueError('cannot have multiple filenames')
+ filename = unquote(qs['filename'][0])
+
+ data_len = int(self.headers['Content-Length'])
+ # in theory this could be optimized, but pygments will use more peak
+ # memory than this anyways
+ data = self.rfile.read(data_len).decode('utf-8', errors='replace')
+
+ # we don't need Server, Date headers
+ self.log_request(200)
+ self.send_response_only(200, None)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+
+ # in theory we could use keep-alive, but cgit will only highlight one
+ # file at a time, and this way we don't need to buffer the output in
+ # order to calculate Content-Length
+ self.send_header('Connection', 'close')
+ self.end_headers()
+
+ try:
+ lexer = guess_lexer_for_filename(filename, data)
+ except ClassNotFound:
+ try:
+ lexer = guess_lexer(data)
+ except ClassNotFound:
+ lexer = TextLexer()
+ self.wfile.write(style_defs)
+ highlight(data, lexer, formatter, outfile=self.wfile)
+
+def main():
+ logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+ parser = argparse.ArgumentParser(description='syntax highlighting server',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--host', type=str, default='localhost',
+ help='''
+ The hostname or IP address to listen on. Note that it is
+ insecure to run syntax-highlighting-server on a public network.
+ ''')
+ parser.add_argument('--port', type=int, default=4872,
+ help='the port to listen on')
+ parser.add_argument('--style', type=str, default='pastie',
+ help='pygments formatting style')
+ parser.add_argument('--listen-mode', type=str, default='single',
+ choices=['single', 'forking', 'reuseport'],
+ help='''
+ single, forking, or reuseport. single uses one
+ process/thread for all requests. forking forks a new
+ process for each request. reuseport forks NUM_SERVERS
+ servers at start, then binds them using SO_REUSEPORT (Linux
+ kernel does a round robin). single is best for low query
+ loads. forking is faster for high loads. reuseport is
+ fastest but uses more idle memory and requires Linux
+ ''')
+ parser.add_argument('--num-servers', type=str, default='auto',
+ help='''
+ The number of servers to run in reuseport mode. Ignored in
+ other modes. auto means one for each CPU.
+ ''')
+ args = parser.parse_args()
+
+ if args.host not in ('localhost', '127.0.0.1', '::1'):
+ logging.warning('''
+ Warning: it is insecure to run syntax-highlighting-server on a
+ public network. Clients can easily attack http.server or pygments.
+ ''')
+
+ logging.info('starting syntax-highlighting on {} port {}'.format(args.host, args.port))
+
+ # reduces first request latency, costs 5-10 MB RAM
+ # for forking, this is necessary for any performance at all (otherwise it
+ # defeats the whole purpose of using a separate process)
+ for lexer in LEXERS.values():
+ _load_lexers(lexer[0])
+ __import__('pygments.styles.' + args.style)
+
+ def start_server(MyHTTPServer):
+ with MyHTTPServer((args.host, args.port), HighlightingHandler) as server:
+ logging.info('started syntax-highlighting-server')
+ server.serve_forever()
+
+ if args.listen_mode == 'single':
+ start_server(BaseHTTPServer)
+
+ elif args.listen_mode == 'forking':
+ # note: Threading isn't useful for performance because of the GIL
+ from socketserver import ForkingMixIn
+ class ForkingHTTPServer(ForkingMixIn, BaseHTTPServer):
+ pass
+ start_server(ForkingHTTPServer)
+
+ elif args.listen_mode == 'reuseport':
+ import os
+ import signal
+
+ if not hasattr(socket, 'SO_REUSEPORT'):
+ raise Exception('SO_REUSEPORT not available on this platform')
+
+ class ReusePortHTTPServer(BaseHTTPServer):
+ def server_bind(self):
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+ super().server_bind()
+
+ if args.num_servers == 'auto':
+ num_servers = len(os.sched_getaffinity(0))
+ logging.info('auto-detected {} CPUs'.format(num_servers))
+ else:
+ num_servers = int(args.num_servers)
+ logging.info('starting {} servers'.format(num_servers))
+
+ pipe = os.pipe()
+
+ try:
+ for i in range(num_servers):
+ pid = os.fork()
+ if pid == 0:
+ os.close(pipe[1])
+ with ReusePortHTTPServer((args.host, args.port), HighlightingHandler) as server:
+ with selectors.DefaultSelector() as selector:
+ selector.register(server, selectors.EVENT_READ)
+ selector.register(pipe[0], selectors.EVENT_READ)
+ while True:
+ ready = selector.select(None)
+ for key, events in ready:
+ if key.fd == pipe[0]:
+ return
+ if ready:
+ server._handle_request_noblock()
+ server.service_actions()
+ sys.exit(0)
+ os.close(pipe[0])
+ logging.info('started syntax-highlighting-server')
+ os.wait()
+ logging.info('worker died, shutting down syntax-highlighting-server')
+ except KeyboardInterrupt:
+ if pid != 0:
+ logging.info('ctrl-c received, shutting down syntax-highlighting-server')
+
+ else:
+ raise Exception('invalid listen mode: {}'.format(args.listen_mode))
+
+if __name__ == '__main__':
+ main()
diff --git a/syntax-highlighting.service.in b/syntax-highlighting.service.in
new file mode 100644
index 0000000..db10fb7
--- /dev/null
+++ b/syntax-highlighting.service.in
@@ -0,0 +1,33 @@
+[Unit]
+Description=syntax highlighting server
+
+[Service]
+Type=simple
+User=nobody
+ExecStart=@libdir@/cgit/syntax-highlighting-server.py localhost 4872
+
+IPAddressAllow=localhost
+IPAddressDeny=any
+LockPersonality=yes
+MemoryDenyWriteExecute=yes
+NoNewPrivileges=yes
+PrivateDevices=yes
+PrivateNetwork=yes
+PrivateTmp=yes
+ProtectControlGroups=yes
+ProtectHome=yes
+ProtectHostname=yes
+ProtectKernelModules=yes
+ProtectKernelTunables=yes
+ProtectKernelLogs=yes
+ProtectSystem=strict
+RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
+RestrictNamespaces=yes
+RestrictRealtime=yes
+RestrictSUIDSGID=yes
+SystemCallArchitectures=native
+SystemCallErrorNumber=EPERM
+SystemCallFilter=@system-service
+
+[Install]
+WantedBy=multi-user.target