Page MenuHomePhabricator
Paste P8636

logstash-dump.py
ActivePublic

Authored by ema on Jun 21 2019, 5:17 AM.
Tags
None
Referenced Files
F29608826: raw.txt
Jun 21 2019, 5:17 AM
Subscribers
None
#!/usr/bin/python3
import os
import requests
ldap_user = os.getenv('LDAP_USER')
ldap_pass = os.getenv('LDAP_PASS')
query = 'logger_name:varnishslowlog request-Host:en.wikipedia.org AND layer:backend AND http-status:200 AND @timestamp:[2018-03-10 TO 2018-03-10] AND host:cp1*'
print query
url = "https://logstash.wikimedia.org/elasticsearch/_search?size=10000&q={}".format(query)
resp = requests.get(url, auth=requests.auth.HTTPBasicAuth(ldap_user, ldap_pass))
assert resp.status_code == 200
data = resp.json()
for line in data['hits']['hits']:
log = line['_source']
timing = log.get('response-Backend-Timing')
if not timing:
# This is not coming from mediawiki
continue
mw_seconds = float(timing.split()[0].replace('D=', '')) / 1000000.0
log['varnish-seconds'] = float(log['time-fetch']) - mw_seconds
if log['varnish-seconds'] > 0.5:
print "%(@timestamp)s %(http-url)s %(host)s %(layer)s %(http-status)s %(varnish-seconds)f" % log,
print log.get('response-X-Cache-Int', '')