diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb026ec --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data/*.log diff --git a/app.py b/app.py index 0a78b18..e14663e 100644 --- a/app.py +++ b/app.py @@ -1,128 +1,136 @@ """ Copyright (C) 2018 Kunal Mehta This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ import elasticsearch import elasticsearch.client import functools import json import os from php import Php import re import requests import sys import toolforge +__dir__ = os.path.dirname(__file__) JENKINS = 'https://integration.wikimedia.org/ci/' TRIGGERED_RE = re.compile('Triggered by change:\n ([0-9,]*?)', flags=re.MULTILINE) CREDENTIALS = Php.parse_ini_file(os.path.expanduser('~/.elasticsearch.ini')) toolforge.set_user_agent('flaky-ci') session = requests.Session() INDEX = 'flaky-ci' DOC_TYPE = 'jenkins-job' client = elasticsearch.Elasticsearch( ['tools-elastic-01.tools.eqiad.wmflabs:80'], connection_class=elasticsearch.RequestsHttpConnection, http_auth=(CREDENTIALS['user'], CREDENTIALS['password']) ) def jenkins_api(path): r = session.get(JENKINS + path + '/api/json') r.raise_for_status() return r.json() def build_log(job_name, number): r = session.get(JENKINS + 'job/%s/%s/consoleText' % (job_name, number)) r.raise_for_status() return r.text def recent_builds(job_name): data = jenkins_api('job/%s' % job_name) latest = data['builds'][0]['number'] first = data['firstBuild']['number'] for i in reversed(range(first, latest + 1)): yield build_info(job_name, i) def init_indices(): indices = elasticsearch.client.IndicesClient(client) if indices.exists(index=INDEX): indices.delete(index=INDEX) with open(os.path.join(os.path.dirname(__file__), 'index.json')) as f: index = json.load(f) indices.create( index=INDEX, body=index ) @functools.lru_cache() def build_info(job_name, number): data = jenkins_api('job/%s/%s' % (job_name, number)) document = {} if data.get('description'): triggered = TRIGGERED_RE.search(data['description']) if triggered: document['triggered'] = triggered.group(2) params = {} for param in data['actions'][1]['parameters']: params[param['name']] = param['value'] document['project'] = params['ZUUL_PROJECT'] document['job'] = job_name document['number'] = number document['result'] = data['result'] - document['console'] = build_log(job_name, number) + job_dir = os.path.join(__dir__, 'data', job_name) + if not os.path.isdir(job_dir): + os.mkdir(job_dir) + # data/job/number-FAILED.log + log_fname = os.path.join(job_dir, '%s-%s.log') % (number, data['result']) + if not os.path.exists(log_fname): + log = build_log(job_name, number) + with open(log_fname, 'w') as f: + f.write(log) document['node'] = data['builtOn'] # Unix times document['timestamp'] = data['timestamp'] / 1000 document['duration'] = data['duration'] / 1000 return document def import_to_es(document): print('Importing %s #%s' % (document['job'], document['number'])) - print(len(document['console'])) print(client.index(index=INDEX, doc_type=DOC_TYPE, body=document)) def main(): init_indices() jobs = ['mwext-mw-selenium-node-composer-jessie'] for job_name in jobs: for document in recent_builds(job_name): import_to_es(document) def search(): q = client.search(index=INDEX, body={ 'query': { 'regexp': { 'console': 'An element could not be located on the page using the given search parameters', } } }) print(q) if __name__ == '__main__': if '--import' in sys.argv: main() else: search() diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/index.json b/index.json index 88495ea..f3c5408 100644 --- a/index.json +++ b/index.json @@ -1,60 +1,55 @@ { "mappings": { "jenkins-job": { "dynamic_templates": [ { "string_fields": { "match": "*", "mapping": { "index": "not_analyzed", "type": "string", "omit_norms": true }, "match_mapping_type": "string" } } ], "_all": { "norms": false, "enabled": true }, "properties": { "number": { "type": "long" }, "triggered": { "norms": false, "type": "text" }, "job": { "norms": false, "type": "text" }, - "console": { - "type": "text", - "norms": false, - "analyzer": "keyword" - }, "project": { "norms": false, "type": "text" }, "duration": { "type": "float" }, "timestamp": { "type": "float" }, "result": { "norms": false, "type": "text" }, "node": { "norms": false, "type": "text" } } } } }