From 361d396d01f6cdeb49970c0eeb705da442879f05 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Fri, 6 Mar 2020 18:59:57 -0500 Subject: Initial commit --- Makefile | 23 +++++ syntax-highlighting-client.sh | 22 +++++ syntax-highlighting-server.py | 189 +++++++++++++++++++++++++++++++++++++++++ syntax-highlighting.service.in | 33 +++++++ 4 files changed, 267 insertions(+) create mode 100644 Makefile create mode 100755 syntax-highlighting-client.sh create mode 100755 syntax-highlighting-server.py create mode 100644 syntax-highlighting.service.in diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ad366da --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +prefix = /usr/local +libdir = $(prefix)/lib +systemdsystemunitdir = $(libdir)/systemd/system + +INSTALL = install +RM = rm -f +RMDIR = rmdir +SED = sed + +all: + +install: + $(INSTALL) -m 0755 -d $(DESTDIR)$(libdir)/cgit + $(INSTALL) -m 0755 syntax-highlighting-server.py $(DESTDIR)$(libdir)/cgit/syntax-highlighting-server.py + $(INSTALL) -m 0755 -d $(DESTDIR)$(libdir)/cgit/filters + $(INSTALL) -m 0755 syntax-highlighting-client.sh $(DESTDIR)$(libdir)/cgit/filters/syntax-highlighting-client.sh + $(INSTALL) -m 0755 -d $(DESTDIR)$(systemdsystemunitdir) + $(SED) -e 's:@libdir@:$(libdir):' syntax-highlighting.service.in > $(DESTDIR)$(systemdsystemunitdir)/syntax-highlighting.service + +uninstall: + $(RM) $(DESTDIR)$(libdir)/cgit/syntax-highlighting-server.py + $(RM) $(DESTDIR)$(libdir)/cgit/filters/syntax-highlighting-client.sh + $(RM) $(DESTDIR)$(systemdsystemunitdir)/syntax-highlighting.service diff --git a/syntax-highlighting-client.sh b/syntax-highlighting-client.sh new file mode 100755 index 0000000..76aced1 --- /dev/null +++ b/syntax-highlighting-client.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# This script can be used to implement syntax highlighting in the cgit +# tree-view by referring to this file with the source-filter or repo.source- +# filter options in cgitrc. +# +# This script requires that syntax-highlighting-server.py be started to handle +# highlighting requests. + +# configure this to your syntax-highlighting port +base_url="http://localhost:4872/" + +path=$(curl -Gso /dev/null \ + -w "%{url_effective}" \ + --data-urlencode "filename=$1" \ + /highlight) + +exec curl -sSf \ + -H 'Content-Type: text/plain' \ + -H 'Expect:' \ + --data-binary @- \ + "${base_url%/}$path" diff --git a/syntax-highlighting-server.py b/syntax-highlighting-server.py new file mode 100755 index 0000000..e093a9a --- /dev/null +++ b/syntax-highlighting-server.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +# syntax-highlighting-server.py: create a simple HTTP server to highlight +# source for cgit. improves performance compared to invoking python on every +# request. +# +# Requirements: Python 3, pygments. +# +# Usage: Configure your system to run this at boot. Note that this program is +# not hardened, and it can be trivially DoSed. therefore, do not configure it +# to listen on a public network. Once configured, set your cgit source filter +# to syntax-highlighting-client.sh. + +import argparse +import logging +import socket +import selectors +import sys + +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib.parse import parse_qs, unquote + +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import LEXERS, _load_lexers, guess_lexer, guess_lexer_for_filename +from pygments.lexers.special import TextLexer +from pygments.util import ClassNotFound + +class BaseHTTPServer(HTTPServer): + # set SO_REUSEADDR + allow_reuse_address = True + # socketserver defaults to 5. especially in listen mode 'single', a + # sudden surge can easily overwhelm that. + request_queue_size = 128 + +formatter = HtmlFormatter(style='pastie', nobackground=True, encoding='utf-8') +style_defs = ('').encode('utf-8') + +class HighlightingHandler(BaseHTTPRequestHandler): + # read by BaseHTTPRequestHandler. need this so that curl doesn't delay + # waiting for 100-continue + protocol_version = 'HTTP/1.1' + + def do_POST(self): + qs = parse_qs(self.path.split('?', 1)[1]) + if len(qs['filename']) != 1: + raise ValueError('cannot have multiple filenames') + filename = unquote(qs['filename'][0]) + + data_len = int(self.headers['Content-Length']) + # in theory this could be optimized, but pygments will use more peak + # memory than this anyways + data = self.rfile.read(data_len).decode('utf-8', errors='replace') + + # we don't need Server, Date headers + self.log_request(200) + self.send_response_only(200, None) + self.send_header('Content-Type', 'text/html; charset=utf-8') + + # in theory we could use keep-alive, but cgit will only highlight one + # file at a time, and this way we don't need to buffer the output in + # order to calculate Content-Length + self.send_header('Connection', 'close') + self.end_headers() + + try: + lexer = guess_lexer_for_filename(filename, data) + except ClassNotFound: + try: + lexer = guess_lexer(data) + except ClassNotFound: + lexer = TextLexer() + self.wfile.write(style_defs) + highlight(data, lexer, formatter, outfile=self.wfile) + +def main(): + logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) + + parser = argparse.ArgumentParser(description='syntax highlighting server', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--host', type=str, default='localhost', + help=''' + The hostname or IP address to listen on. Note that it is + insecure to run syntax-highlighting-server on a public network. + ''') + parser.add_argument('--port', type=int, default=4872, + help='the port to listen on') + parser.add_argument('--style', type=str, default='pastie', + help='pygments formatting style') + parser.add_argument('--listen-mode', type=str, default='single', + choices=['single', 'forking', 'reuseport'], + help=''' + single, forking, or reuseport. single uses one + process/thread for all requests. forking forks a new + process for each request. reuseport forks NUM_SERVERS + servers at start, then binds them using SO_REUSEPORT (Linux + kernel does a round robin). single is best for low query + loads. forking is faster for high loads. reuseport is + fastest but uses more idle memory and requires Linux + ''') + parser.add_argument('--num-servers', type=str, default='auto', + help=''' + The number of servers to run in reuseport mode. Ignored in + other modes. auto means one for each CPU. + ''') + args = parser.parse_args() + + if args.host not in ('localhost', '127.0.0.1', '::1'): + logging.warning(''' + Warning: it is insecure to run syntax-highlighting-server on a + public network. Clients can easily attack http.server or pygments. + ''') + + logging.info('starting syntax-highlighting on {} port {}'.format(args.host, args.port)) + + # reduces first request latency, costs 5-10 MB RAM + # for forking, this is necessary for any performance at all (otherwise it + # defeats the whole purpose of using a separate process) + for lexer in LEXERS.values(): + _load_lexers(lexer[0]) + __import__('pygments.styles.' + args.style) + + def start_server(MyHTTPServer): + with MyHTTPServer((args.host, args.port), HighlightingHandler) as server: + logging.info('started syntax-highlighting-server') + server.serve_forever() + + if args.listen_mode == 'single': + start_server(BaseHTTPServer) + + elif args.listen_mode == 'forking': + # note: Threading isn't useful for performance because of the GIL + from socketserver import ForkingMixIn + class ForkingHTTPServer(ForkingMixIn, BaseHTTPServer): + pass + start_server(ForkingHTTPServer) + + elif args.listen_mode == 'reuseport': + import os + import signal + + if not hasattr(socket, 'SO_REUSEPORT'): + raise Exception('SO_REUSEPORT not available on this platform') + + class ReusePortHTTPServer(BaseHTTPServer): + def server_bind(self): + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + super().server_bind() + + if args.num_servers == 'auto': + num_servers = len(os.sched_getaffinity(0)) + logging.info('auto-detected {} CPUs'.format(num_servers)) + else: + num_servers = int(args.num_servers) + logging.info('starting {} servers'.format(num_servers)) + + pipe = os.pipe() + + try: + for i in range(num_servers): + pid = os.fork() + if pid == 0: + os.close(pipe[1]) + with ReusePortHTTPServer((args.host, args.port), HighlightingHandler) as server: + with selectors.DefaultSelector() as selector: + selector.register(server, selectors.EVENT_READ) + selector.register(pipe[0], selectors.EVENT_READ) + while True: + ready = selector.select(None) + for key, events in ready: + if key.fd == pipe[0]: + return + if ready: + server._handle_request_noblock() + server.service_actions() + sys.exit(0) + os.close(pipe[0]) + logging.info('started syntax-highlighting-server') + os.wait() + logging.info('worker died, shutting down syntax-highlighting-server') + except KeyboardInterrupt: + if pid != 0: + logging.info('ctrl-c received, shutting down syntax-highlighting-server') + + else: + raise Exception('invalid listen mode: {}'.format(args.listen_mode)) + +if __name__ == '__main__': + main() diff --git a/syntax-highlighting.service.in b/syntax-highlighting.service.in new file mode 100644 index 0000000..db10fb7 --- /dev/null +++ b/syntax-highlighting.service.in @@ -0,0 +1,33 @@ +[Unit] +Description=syntax highlighting server + +[Service] +Type=simple +User=nobody +ExecStart=@libdir@/cgit/syntax-highlighting-server.py localhost 4872 + +IPAddressAllow=localhost +IPAddressDeny=any +LockPersonality=yes +MemoryDenyWriteExecute=yes +NoNewPrivileges=yes +PrivateDevices=yes +PrivateNetwork=yes +PrivateTmp=yes +ProtectControlGroups=yes +ProtectHome=yes +ProtectHostname=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +ProtectKernelLogs=yes +ProtectSystem=strict +RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 +RestrictNamespaces=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes +SystemCallArchitectures=native +SystemCallErrorNumber=EPERM +SystemCallFilter=@system-service + +[Install] +WantedBy=multi-user.target -- cgit v1.2.3-54-g00ecf