1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
#!/usr/bin/env python3
# syntax-highlighting-server.py: create a simple HTTP server to highlight
# source for cgit. improves performance compared to invoking python on every
# request.
#
# Requirements: Python 3, pygments.
#
# Usage: Configure your system to run this at boot. Note that this program is
# not hardened, and it can be trivially DoSed. therefore, do not configure it
# to listen on a public network. Once configured, set your cgit source filter
# to syntax-highlighting-client.sh.
import argparse
import logging
import socket
import selectors
import sys
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import parse_qs, unquote
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import LEXERS, _load_lexers, guess_lexer, guess_lexer_for_filename
from pygments.lexers.special import TextLexer
from pygments.util import ClassNotFound
class HighlightingHTTPServer(HTTPServer):
# set SO_REUSEADDR
allow_reuse_address = True
# socketserver defaults to 5. especially in listen mode 'single', a
# sudden surge can easily overwhelm that.
request_queue_size = 64
def __init__(self, *args, style='pastie', **kwargs):
super().__init__(*args, **kwargs)
self.formatter = HtmlFormatter(style=style, nobackground=True, encoding='utf-8')
self.style_defs = ('<style>' + self.formatter.get_style_defs('.highlight') + '</style>').encode('utf-8')
class HighlightingHandler(BaseHTTPRequestHandler):
# read by BaseHTTPRequestHandler. need this so that curl doesn't delay
# waiting for 100-continue
protocol_version = 'HTTP/1.1'
def do_POST(self):
qs = parse_qs(self.path.split('?', 1)[1])
if len(qs['filename']) != 1:
raise ValueError('cannot have multiple filenames')
filename = unquote(qs['filename'][0])
data_len = int(self.headers['Content-Length'])
# in theory this could be optimized, but pygments will use more peak
# memory than this anyways
data = self.rfile.read(data_len).decode('utf-8', errors='replace')
# we don't need Server, Date headers
self.log_request(200)
self.send_response_only(200, None)
self.send_header('Content-Type', 'text/html; charset=utf-8')
# in theory we could use keep-alive, but cgit will only highlight one
# file at a time, and this way we don't need to buffer the output in
# order to calculate Content-Length
self.send_header('Connection', 'close')
self.end_headers()
try:
lexer = guess_lexer_for_filename(filename, data)
except ClassNotFound:
try:
lexer = guess_lexer(data)
except ClassNotFound:
lexer = TextLexer()
self.wfile.write(self.server.style_defs)
highlight(data, lexer, self.server.formatter, outfile=self.wfile)
def main():
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
parser = argparse.ArgumentParser(description='syntax highlighting server',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--host', type=str, default='localhost',
help='''
The hostname or IP address to listen on. Note that it is
insecure to run syntax-highlighting-server on a public network.
''')
parser.add_argument('--port', type=int, default=4872,
help='the port to listen on')
parser.add_argument('--style', type=str, default='pastie',
help='pygments formatting style')
parser.add_argument('--listen-mode', type=str, default='single',
choices=['single', 'forking', 'reuseport'],
help='''
single, forking, or reuseport. single uses one
process/thread for all requests. forking forks a new
process for each request. reuseport forks NUM_SERVERS
servers at start, then binds them using SO_REUSEPORT (Linux
kernel does a round robin). single is best for low query
loads. forking is faster for high loads. reuseport is
fastest but uses more idle memory and requires Linux
''')
parser.add_argument('--num-servers', type=str, default='auto',
help='''
The number of servers to run in reuseport mode. Ignored in
other modes. auto means one for each CPU.
''')
args = parser.parse_args()
if args.host not in ('localhost', '127.0.0.1', '::1'):
logging.warning('''
Warning: it is insecure to run syntax-highlighting-server on a
public network. Clients can easily attack http.server or pygments.
''')
logging.info('starting syntax-highlighting on {} port {}'.format(args.host, args.port))
# reduces first request latency, costs 5-10 MB RAM
# for forking, this is necessary for any performance at all (otherwise it
# defeats the whole purpose of using a separate process)
for lexer in LEXERS.values():
_load_lexers(lexer[0])
__import__('pygments.styles.' + args.style)
def start_server(MyHTTPServer):
with MyHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server:
logging.info('started syntax-highlighting-server')
server.serve_forever()
if args.listen_mode == 'single':
start_server(HighlightingHTTPServer)
elif args.listen_mode == 'forking':
# note: Threading isn't useful for performance because of the GIL
from socketserver import ForkingMixIn
class ForkingHTTPServer(ForkingMixIn, HighlightingHTTPServer):
pass
start_server(ForkingHTTPServer)
elif args.listen_mode == 'reuseport':
import os
import signal
if not hasattr(socket, 'SO_REUSEPORT'):
raise Exception('SO_REUSEPORT not available on this platform')
class ReusePortHTTPServer(HighlightingHTTPServer):
def server_bind(self):
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
super().server_bind()
if args.num_servers == 'auto':
num_servers = len(os.sched_getaffinity(0))
logging.info('auto-detected {} CPUs'.format(num_servers))
else:
num_servers = int(args.num_servers)
logging.info('starting {} servers'.format(num_servers))
pipe = os.pipe()
try:
for i in range(num_servers):
pid = os.fork()
if pid == 0:
os.close(pipe[1])
with ReusePortHTTPServer((args.host, args.port), HighlightingHandler, style=args.style) as server:
with selectors.DefaultSelector() as selector:
selector.register(server, selectors.EVENT_READ)
selector.register(pipe[0], selectors.EVENT_READ)
while True:
ready = selector.select(None)
for key, events in ready:
if key.fd == pipe[0]:
return
if ready:
server._handle_request_noblock()
server.service_actions()
sys.exit(0)
os.close(pipe[0])
logging.info('started syntax-highlighting-server')
os.wait()
logging.info('worker died, shutting down syntax-highlighting-server')
except KeyboardInterrupt:
if pid != 0:
logging.info('ctrl-c received, shutting down syntax-highlighting-server')
else:
raise Exception('invalid listen mode: {}'.format(args.listen_mode))
if __name__ == '__main__':
main()
|