diff --git a/app.py b/app.py index 5d89a07..2f8bb3a 100644 --- a/app.py +++ b/app.py @@ -1,78 +1,88 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- # # This file is part of precise-tools # Copyright (C) 2017 Bryan Davis and contributors # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . import collections import datetime import traceback import flask import precise_tools app = flask.Flask(__name__) @app.route('/') def home(): try: # tools will be a structure something like: # { - # 'tool A': { - # 'job X': { - # 'count': N, - # 'last': datetime, - # }, - # 'job Y': { - # 'count': N, - # 'last': datetime, - # }, - # ... - # }, - # ... + # 'tool A': { + # 'jobs': { + # 'job X': { + # 'count': N, + # 'last': datetime, + # }, + # 'job Y': { + # 'count': N, + # 'last': datetime, + # }, + # ... + # }, + # 'members': [ + # 'user A', + # 'user B', + # ... + # ] + # }, + # ... # } purge = 'purge' in flask.request.args tools = None if purge else precise_tools.CACHE.load('maindict') if tools is None: - tools = collections.defaultdict( - lambda: collections.defaultdict(lambda: { + tools = collections.defaultdict(lambda: { + 'jobs': collections.defaultdict(lambda: { 'count': 1, - 'last': None - })) + 'last': ''}), + 'members': []}) for rec in precise_tools.tools_from_accounting(7): - tools[rec[0]][rec[1]]['count'] = rec[2] - tools[rec[0]][rec[1]]['last'] = ( + tools[rec[0]]['jobs'][rec[1]]['count'] = rec[2] + tools[rec[0]]['jobs'][rec[1]]['last'] = ( datetime.datetime.fromtimestamp( rec[3]).strftime('%Y-%m-%d %H:%M')) - for rec in precise_tools.tools_from_grid(): - tools[rec[0]][rec[1]]['last'] = 'Currently running' + tools[rec[0]]['jobs'][rec[1]]['last'] = 'Currently running' + + for key, val in precise_tools.tools_members(tools.keys()).items(): + tools[key]['members'] = list(val) + precise_tools.CACHE.save('maindict', tools) return flask.render_template('home.html', tools=tools) except Exception: traceback.print_exc() raise if __name__ == '__main__': app.run() # vim:sw=4:ts=4:sts=4:et: diff --git a/precise_tools/__init__.py b/precise_tools/__init__.py index a2f3589..41046f8 100644 --- a/precise_tools/__init__.py +++ b/precise_tools/__init__.py @@ -1,114 +1,140 @@ # -*- coding: utf-8 -*- # # This file is part of precise-tools # Copyright (C) 2017 Bryan Davis and contributors # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . import collections import datetime import httplib import json +import ldap3 + from . import utils ACCOUNTING_FIELDS = [ 'qname', 'hostname', 'group', 'owner', 'job_name', 'job_number', 'account', 'priority', 'submission_time', 'start_time', 'end_time', 'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project', 'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io', 'category', 'iow', 'pe_taskid', 'maxvemem', 'arid', 'ar_submission_time', ] CACHE = utils.Cache() def tools_from_accounting(days): """Get a list of (tool, job name, count, last) tuples for jobs running on precise exec nodes in the last N days.""" delta = datetime.timedelta(days=days) cutoff = int(utils.totimestamp(datetime.datetime.now() - delta)) jobs = collections.defaultdict(lambda: collections.defaultdict(list)) for line in utils.tail_lines( '/data/project/.system/accounting', 400 * 45000 * days): parts = line.split(':') job = dict(zip(ACCOUNTING_FIELDS, parts)) if int(job['end_time']) < cutoff: continue tool = job['owner'] if tool is not None: if 'release=precise' in job['category']: jobs[tool][job['job_name']].append(int(job['end_time'])) else: try: del jobs[tool][job['job_name']] except KeyError: # defaultdict does not prevent KeyError on del pass tools = [] for tool_name, tool_jobs in jobs.iteritems(): tool_name = normalize_toolname(tool_name) if tool_name is not None: for job_name, job_starts in tool_jobs.iteritems(): tools.append(( tool_name, job_name, len(job_starts), max(job_starts) )) return tools def is_precise_host(hostname): if hostname[-4:].startswith('12'): return True def tools_from_grid(): """Get a list of (tool, job name, count, last) tuples for jobs running on precise exec nodes currently.""" tools = [] conn = httplib.HTTPConnection('tools.wmflabs.org') conn.request( 'GET', '/gridengine-status', headers={ 'User-Agent': 'https://tools.wmflabs.org/precise-tools/' } ) res = conn.getresponse().read() if not res: return [] grid_info = json.loads(res)['data']['attributes'] for host, info in grid_info.iteritems(): if is_precise_host(host): if info['jobs']: tools.extend([ ( normalize_toolname(job['job_owner']), job['job_name'], ) for job in info['jobs'].values() ]) return tools def normalize_toolname(name): if name.startswith('tools.'): return name[6:] # else None -- we ignore non-tool accounts like 'root' + + +def tools_members(tools): + """ + Return a dict that has members of a tool associated with each tool + Ex: + {'musikbot': ['musikanimal'], + 'ifttt': ['slaporte', 'mahmoud', 'madhuvishy', 'ori']} + """ + tool_to_members = collections.defaultdict(set) + with utils.ldap_conn() as conn: + for tool in tools: + conn.search( + 'ou=servicegroups,dc=wikimedia,dc=org', + '(cn=tools.{})'.format(tool), + ldap3.SEARCH_SCOPE_WHOLE_SUBTREE, + attributes=['member', 'cn'], + time_limit=5 + ) + for resp in conn.response: + attributes = resp.get('attributes') + for member in attributes.get('member', []): + tool_to_members[tool].add(utils.uid_from_dn(member)) + return tool_to_members diff --git a/precise_tools/utils.py b/precise_tools/utils.py index e15eb8b..478b8ea 100644 --- a/precise_tools/utils.py +++ b/precise_tools/utils.py @@ -1,73 +1,102 @@ # -*- coding: utf-8 -*- # # This file is part of precise-tools # Copyright (C) 2017 Bryan Davis and contributors # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . from __future__ import division import datetime import json import os +import yaml +import ldap3 import redis class Cache(object): def __init__(self, enabled=True): self.enabled = enabled self.conn = redis.Redis(host='tools-redis') with open(os.path.expanduser('~/redis-prefix.conf'), 'r') as f: self.prefix = f.read() def key(self, val): return '%s%s' % (self.prefix, val) def load(self, key): if self.enabled: try: return json.loads(self.conn.get(self.key(key)) or '') except ValueError: return None else: return None def save(self, key, data, expiry=3600): if self.enabled: real_key = self.key(key) self.conn.set(real_key, json.dumps(data)) def tail_lines(filename, nbytes): """Get lines from last n bytes from the filename as an iterator.""" with open(filename, 'r') as f: f.seek(-nbytes, os.SEEK_END) # Ignore first line as it may be only part of a line f.readline() # We can't simply `return f` as the returned f will be closed # Do all the IO within this function for line in f: yield line def totimestamp(dt, epoch=None): """Convert a datetime to unix epoch seconds.""" # From http://stackoverflow.com/a/8778548/8171 if epoch is None: epoch = datetime.datetime(1970, 1, 1) td = dt - epoch return (td.microseconds + (td.seconds + td.days * 86400) * 10**6) / 10**6 + + +def ldap_conn(): + """ + Return a ldap connection + + Return value can be used as a context manager + """ + with open('/etc/ldap.yaml') as f: + config = yaml.safe_load(f) + + servers = ldap3.ServerPool([ + ldap3.Server(host) + for host in config['servers'] + ], ldap3.POOLING_STRATEGY_ROUND_ROBIN, active=True, exhaust=True) + return ldap3.Connection(servers, + read_only=True, + user=config['user'], + auto_bind=True, + password=config['password']) + + +def uid_from_dn(dn): + keys = dn.split(',') + uid_key = keys[0] + uid = uid_key.split('=')[1] + return uid diff --git a/requirements.txt b/requirements.txt index 1a5dc97..294b298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ flask +ldap3 redis diff --git a/templates/home.html b/templates/home.html index cde4fe5..4a8f182 100644 --- a/templates/home.html +++ b/templates/home.html @@ -1,35 +1,35 @@ {% extends "layout.html" %} {% block body %}

Tools running jobs on Precise OGE hosts in last 7 days

See Tools Precise deprecation for information on migrating your tool off of Precise before the 2017-03-06 deadline.

Total tools still running on Precise: {{ tools|count }}

- {% for tool, jobs in tools|dictsort %} + {% for tool_name, tool_data in tools|dictsort %} - - {% for job, data in jobs|dictsort %} + {% for job, data in tool_data.jobs|dictsort %} {% if not loop.first %}{% endif %} {% if not loop.last %}{% endif %} {% endfor %} {% endfor %}
Tool Job Total seen Last seen
- {{ tool }} + + {{ tool_name }}
{{ job }} {{ data.count }} {{ data.last }}
{% endblock %}