diff --git a/app.py b/app.py
index 5d89a07..2f8bb3a 100644
--- a/app.py
+++ b/app.py
@@ -1,78 +1,88 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# This file is part of precise-tools
# Copyright (C) 2017 Bryan Davis and contributors
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see .
import collections
import datetime
import traceback
import flask
import precise_tools
app = flask.Flask(__name__)
@app.route('/')
def home():
try:
# tools will be a structure something like:
# {
- # 'tool A': {
- # 'job X': {
- # 'count': N,
- # 'last': datetime,
- # },
- # 'job Y': {
- # 'count': N,
- # 'last': datetime,
- # },
- # ...
- # },
- # ...
+ # 'tool A': {
+ # 'jobs': {
+ # 'job X': {
+ # 'count': N,
+ # 'last': datetime,
+ # },
+ # 'job Y': {
+ # 'count': N,
+ # 'last': datetime,
+ # },
+ # ...
+ # },
+ # 'members': [
+ # 'user A',
+ # 'user B',
+ # ...
+ # ]
+ # },
+ # ...
# }
purge = 'purge' in flask.request.args
tools = None if purge else precise_tools.CACHE.load('maindict')
if tools is None:
- tools = collections.defaultdict(
- lambda: collections.defaultdict(lambda: {
+ tools = collections.defaultdict(lambda: {
+ 'jobs': collections.defaultdict(lambda: {
'count': 1,
- 'last': None
- }))
+ 'last': ''}),
+ 'members': []})
for rec in precise_tools.tools_from_accounting(7):
- tools[rec[0]][rec[1]]['count'] = rec[2]
- tools[rec[0]][rec[1]]['last'] = (
+ tools[rec[0]]['jobs'][rec[1]]['count'] = rec[2]
+ tools[rec[0]]['jobs'][rec[1]]['last'] = (
datetime.datetime.fromtimestamp(
rec[3]).strftime('%Y-%m-%d %H:%M'))
-
for rec in precise_tools.tools_from_grid():
- tools[rec[0]][rec[1]]['last'] = 'Currently running'
+ tools[rec[0]]['jobs'][rec[1]]['last'] = 'Currently running'
+
+ for key, val in precise_tools.tools_members(tools.keys()).items():
+ tools[key]['members'] = list(val)
+
precise_tools.CACHE.save('maindict', tools)
return flask.render_template('home.html', tools=tools)
except Exception:
traceback.print_exc()
raise
if __name__ == '__main__':
app.run()
# vim:sw=4:ts=4:sts=4:et:
diff --git a/precise_tools/__init__.py b/precise_tools/__init__.py
index a2f3589..41046f8 100644
--- a/precise_tools/__init__.py
+++ b/precise_tools/__init__.py
@@ -1,114 +1,140 @@
# -*- coding: utf-8 -*-
#
# This file is part of precise-tools
# Copyright (C) 2017 Bryan Davis and contributors
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see .
import collections
import datetime
import httplib
import json
+import ldap3
+
from . import utils
ACCOUNTING_FIELDS = [
'qname', 'hostname', 'group', 'owner', 'job_name', 'job_number',
'account', 'priority', 'submission_time', 'start_time', 'end_time',
'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime',
'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt',
'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd',
'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project',
'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io',
'category', 'iow', 'pe_taskid', 'maxvemem', 'arid', 'ar_submission_time',
]
CACHE = utils.Cache()
def tools_from_accounting(days):
"""Get a list of (tool, job name, count, last) tuples for jobs running on
precise exec nodes in the last N days."""
delta = datetime.timedelta(days=days)
cutoff = int(utils.totimestamp(datetime.datetime.now() - delta))
jobs = collections.defaultdict(lambda: collections.defaultdict(list))
for line in utils.tail_lines(
'/data/project/.system/accounting', 400 * 45000 * days):
parts = line.split(':')
job = dict(zip(ACCOUNTING_FIELDS, parts))
if int(job['end_time']) < cutoff:
continue
tool = job['owner']
if tool is not None:
if 'release=precise' in job['category']:
jobs[tool][job['job_name']].append(int(job['end_time']))
else:
try:
del jobs[tool][job['job_name']]
except KeyError:
# defaultdict does not prevent KeyError on del
pass
tools = []
for tool_name, tool_jobs in jobs.iteritems():
tool_name = normalize_toolname(tool_name)
if tool_name is not None:
for job_name, job_starts in tool_jobs.iteritems():
tools.append((
tool_name,
job_name,
len(job_starts),
max(job_starts)
))
return tools
def is_precise_host(hostname):
if hostname[-4:].startswith('12'):
return True
def tools_from_grid():
"""Get a list of (tool, job name, count, last) tuples for jobs running on
precise exec nodes currently."""
tools = []
conn = httplib.HTTPConnection('tools.wmflabs.org')
conn.request(
'GET', '/gridengine-status',
headers={
'User-Agent': 'https://tools.wmflabs.org/precise-tools/'
}
)
res = conn.getresponse().read()
if not res:
return []
grid_info = json.loads(res)['data']['attributes']
for host, info in grid_info.iteritems():
if is_precise_host(host):
if info['jobs']:
tools.extend([
(
normalize_toolname(job['job_owner']),
job['job_name'],
)
for job in info['jobs'].values()
])
return tools
def normalize_toolname(name):
if name.startswith('tools.'):
return name[6:]
# else None -- we ignore non-tool accounts like 'root'
+
+
+def tools_members(tools):
+ """
+ Return a dict that has members of a tool associated with each tool
+ Ex:
+ {'musikbot': ['musikanimal'],
+ 'ifttt': ['slaporte', 'mahmoud', 'madhuvishy', 'ori']}
+ """
+ tool_to_members = collections.defaultdict(set)
+ with utils.ldap_conn() as conn:
+ for tool in tools:
+ conn.search(
+ 'ou=servicegroups,dc=wikimedia,dc=org',
+ '(cn=tools.{})'.format(tool),
+ ldap3.SEARCH_SCOPE_WHOLE_SUBTREE,
+ attributes=['member', 'cn'],
+ time_limit=5
+ )
+ for resp in conn.response:
+ attributes = resp.get('attributes')
+ for member in attributes.get('member', []):
+ tool_to_members[tool].add(utils.uid_from_dn(member))
+ return tool_to_members
diff --git a/precise_tools/utils.py b/precise_tools/utils.py
index e15eb8b..478b8ea 100644
--- a/precise_tools/utils.py
+++ b/precise_tools/utils.py
@@ -1,73 +1,102 @@
# -*- coding: utf-8 -*-
#
# This file is part of precise-tools
# Copyright (C) 2017 Bryan Davis and contributors
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see .
from __future__ import division
import datetime
import json
import os
+import yaml
+import ldap3
import redis
class Cache(object):
def __init__(self, enabled=True):
self.enabled = enabled
self.conn = redis.Redis(host='tools-redis')
with open(os.path.expanduser('~/redis-prefix.conf'), 'r') as f:
self.prefix = f.read()
def key(self, val):
return '%s%s' % (self.prefix, val)
def load(self, key):
if self.enabled:
try:
return json.loads(self.conn.get(self.key(key)) or '')
except ValueError:
return None
else:
return None
def save(self, key, data, expiry=3600):
if self.enabled:
real_key = self.key(key)
self.conn.set(real_key, json.dumps(data))
def tail_lines(filename, nbytes):
"""Get lines from last n bytes from the filename as an iterator."""
with open(filename, 'r') as f:
f.seek(-nbytes, os.SEEK_END)
# Ignore first line as it may be only part of a line
f.readline()
# We can't simply `return f` as the returned f will be closed
# Do all the IO within this function
for line in f:
yield line
def totimestamp(dt, epoch=None):
"""Convert a datetime to unix epoch seconds."""
# From http://stackoverflow.com/a/8778548/8171
if epoch is None:
epoch = datetime.datetime(1970, 1, 1)
td = dt - epoch
return (td.microseconds + (td.seconds + td.days * 86400) * 10**6) / 10**6
+
+
+def ldap_conn():
+ """
+ Return a ldap connection
+
+ Return value can be used as a context manager
+ """
+ with open('/etc/ldap.yaml') as f:
+ config = yaml.safe_load(f)
+
+ servers = ldap3.ServerPool([
+ ldap3.Server(host)
+ for host in config['servers']
+ ], ldap3.POOLING_STRATEGY_ROUND_ROBIN, active=True, exhaust=True)
+ return ldap3.Connection(servers,
+ read_only=True,
+ user=config['user'],
+ auto_bind=True,
+ password=config['password'])
+
+
+def uid_from_dn(dn):
+ keys = dn.split(',')
+ uid_key = keys[0]
+ uid = uid_key.split('=')[1]
+ return uid
diff --git a/requirements.txt b/requirements.txt
index 1a5dc97..294b298 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
flask
+ldap3
redis
diff --git a/templates/home.html b/templates/home.html
index cde4fe5..4a8f182 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -1,35 +1,35 @@
{% extends "layout.html" %}
{% block body %}
Tools running jobs on Precise OGE hosts in last 7 days
See Tools Precise deprecation for information on migrating your tool off of Precise before the 2017-03-06 deadline.
Total tools still running on Precise: {{ tools|count }}
Tool |
Job |
Total seen |
Last seen |
- {% for tool, jobs in tools|dictsort %}
+ {% for tool_name, tool_data in tools|dictsort %}
-
- {{ tool }}
+ |
+ {{ tool_name }}
|
- {% for job, data in jobs|dictsort %}
+ {% for job, data in tool_data.jobs|dictsort %}
{% if not loop.first %}
{% endif %}
{{ job }} |
{{ data.count }} |
{{ data.last }} |
{% if not loop.last %}
{% endif %}
{% endfor %}
{% endfor %}
{% endblock %}