diff --git a/app.py b/app.py index 1ba6159..87958dd 100644 --- a/app.py +++ b/app.py @@ -1,75 +1,103 @@ # -*- coding: utf-8 -*- # # This file is part of Toolviews # # Copyright (C) 2018 Wikimedia Foundation and contributors # All Rights Reserved. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . -import datetime +import collections import logging.config import flask import toolforge import werkzeug.contrib.fixers import toolviews +import toolviews.routing logging.config.dictConfig({ 'version': 1, 'formatters': { 'default': { 'format': '%(asctime)s %(name)-12s %(levelname)-8s: %(message)s', 'datefmt': '%Y-%m-%dT%H:%M:%SZ', }, }, 'handlers': { 'wsgi': { 'class': 'logging.StreamHandler', 'stream': 'ext://sys.stderr', 'formatter': 'default', }, }, 'root': { 'level': 'INFO', 'handlers': ['wsgi'] }, }) + + # Create the Flask application app = flask.Flask(__name__) # Add the ProxyFix middleware which reads X-Forwarded-* headers app.wsgi_app = werkzeug.contrib.fixers.ProxyFix(app.wsgi_app) # Always use TLS app.before_request(toolforge.redirect_to_https) +# Register custom route converters +app.url_map.converters['date'] = toolviews.routing.DateConverter @app.route('/') def index(): """Application landing page.""" return flask.render_template('index.html') -@app.route('/api/v1/day/') -@app.route('/api/v1/tool//day/') -def get_hits_for_date(date, tool='*'): - d = datetime.datetime.strptime(date, '%Y-%m-%d').date() +@app.route('/api/v1/day/') +@app.route('/api/v1/tool//day/') +def get_hits_for_date(day, tool='*'): + """Get hits for a tool (or all tools) on a given date.""" return flask.jsonify({ - 'date': date, + 'date': day.strftime('%Y-%m-%d'), + 'tool': tool, 'results': { row['tool']: row['hits'] - for row in toolviews.get_hits(d, tool) + for row in toolviews.hits(day, tool) }, }) + + +@app.route('/api/v1/daily//') +@app.route('/api/v1/tool//daily//') +def get_hits_for_daterange(start, end, tool='*'): + """Get hits for a tool (or all tools) during a date range.""" + hits = collections.defaultdict(dict) + for row in toolviews.hits_range(start, end, tool): + hits[row['request_day']][row['tool']] = row['hits'] + return flask.jsonify({ + 'start': start.strftime('%Y-%m-%d'), + 'end': end.strftime('%Y-%m-%d'), + 'tool': tool, + 'results': hits, + }) + + +@app.route('/api/v1/tools') +def get_tools(): + return flask.jsonify({ + 'results': [row['tool'] for row in toolviews.list_tools()], + }) diff --git a/requirements.txt b/requirements.txt index 8c405e5..3fa96f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ flask ldap3 pymysql pyyaml +redis toolforge diff --git a/toolviews/__init__.py b/toolviews/__init__.py index b37c3ed..fd8b16a 100644 --- a/toolviews/__init__.py +++ b/toolviews/__init__.py @@ -1,46 +1,100 @@ # -*- coding: utf-8 -*- # # This file is part of Toolviews # # Copyright (C) 2019 Wikimedia Foundation and contributors # All Rights Reserved. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . import pymysql.cursors import toolforge -def get_hits(date, tool='*'): - conn = toolforge.connect( +from . import cache + + +def dbconn(): + return toolforge.connect( dbname='s53734__toolviews_p', cluster='labsdb', host='tools.db.svc.eqiad.wmflabs', ) + + +def fetch_all(sql, params=None): + """Execute an SQL query and return all rows.""" + conn = dbconn() try: with conn.cursor(pymysql.cursors.DictCursor) as cur: - sql = ( - "SELECT hits, tool " - "FROM daily_raw_views " - "WHERE request_day = %s " - ) - params = (date.strftime('%Y-%m-%d'),) - if tool != '*': - sql += "AND tool = %s " - params += (tool,) - sql += "ORDER BY hits desc" cur.execute(sql, params) return cur.fetchall() finally: conn.close() + + +def hits(date, tool='*'): + date_str = date.strftime('%Y-%m-%d') + key = "hits:{}:{}".format(tool, date_str) + data = cache.CACHE.load(key) + if data is None: + sql = ( + "SELECT hits, tool " + "FROM daily_raw_views " + "WHERE request_day = %s " + ) + params = (date_str,) + if tool != '*': + sql += "AND tool = %s " + params += (tool,) + sql += "ORDER BY hits desc" + data = fetch_all(sql, params) + cache.CACHE.save(key, data, 3600) + return data + + +def hits_range(start, end, tool='*'): + start_str = start.strftime('%Y-%m-%d') + end_str = end.strftime('%Y-%m-%d') + key = "hits:{}:{}:{}".format(tool, start_str, end_str) + data = cache.CACHE.load(key) + if data is None: + sql = ( + "SELECT request_day, tool, hits " + "FROM daily_raw_views " + "WHERE request_day >= %s " + "AND request_day <= %s " + ) + params = (start_str, end_str,) + if tool != '*': + sql += "AND tool = %s " + params += (tool,) + sql += "ORDER BY request_day desc, hits desc" + data = fetch_all(sql, params) + cache.CACHE.save(key, data, 3600) + return data + + +def list_tools(): + key = "tools:list" + data = cache.CACHE.load(key) + if data is None: + sql = ( + "SELECT DISTINCT(tool) " + "FROM daily_raw_views " + "ORDER BY tool " + ) + data = fetch_all(sql) + cache.CACHE.save(key, data, 3600) + return data diff --git a/toolviews/cache.py b/toolviews/cache.py new file mode 100644 index 0000000..d1b7c1f --- /dev/null +++ b/toolviews/cache.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Toolviews +# +# Copyright (C) 2019 Wikimedia Foundation and contributors +# All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + +import hashlib +import json +import os +import pwd + +import redis + + +class Cache(object): + """Simple redis wrapper.""" + def __init__(self, enabled=True, seed=''): + self.enabled = enabled + self.conn = redis.StrictRedis( + host='tools-redis', + decode_responses=True, + ) + u = pwd.getpwuid(os.getuid()) + self.prefix = hashlib.sha1( + '{}{}.{}'.format(seed, u.pw_name, u.pw_dir).encode('utf-8') + ).hexdigest() + + def key(self, val): + return '{}:{}'.format(self.prefix, val) + + def load(self, key): + if self.enabled: + try: + return json.loads(self.conn.get(self.key(key)) or '') + except ValueError: + return None + else: + return None + + def save(self, key, data, expiry=300): + if self.enabled: + real_key = self.key(key) + self.conn.setex(real_key, expiry, json.dumps(data)) + + +CACHE = Cache(seed='20190707') diff --git a/toolviews/__init__.py b/toolviews/routing.py similarity index 51% copy from toolviews/__init__.py copy to toolviews/routing.py index b37c3ed..7931451 100644 --- a/toolviews/__init__.py +++ b/toolviews/routing.py @@ -1,46 +1,35 @@ # -*- coding: utf-8 -*- # # This file is part of Toolviews # # Copyright (C) 2019 Wikimedia Foundation and contributors # All Rights Reserved. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . +import datetime +import werkzeug.routing -import pymysql.cursors -import toolforge +class DateConverter(werkzeug.routing.BaseConverter): + """ISO8601 date type for url routing.""" + regex = r'[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])' -def get_hits(date, tool='*'): - conn = toolforge.connect( - dbname='s53734__toolviews_p', - cluster='labsdb', - host='tools.db.svc.eqiad.wmflabs', - ) - try: - with conn.cursor(pymysql.cursors.DictCursor) as cur: - sql = ( - "SELECT hits, tool " - "FROM daily_raw_views " - "WHERE request_day = %s " - ) - params = (date.strftime('%Y-%m-%d'),) - if tool != '*': - sql += "AND tool = %s " - params += (tool,) - sql += "ORDER BY hits desc" - cur.execute(sql, params) - return cur.fetchall() - finally: - conn.close() + def to_python(self, value): + try: + return datetime.strptime(value, '%Y-%m-%d').date() + except ValueError: + raise werkzeug.routing.ValidationError() + + def to_url(self, value): + return value.strftime('%Y-%m-%d')