diff --git a/bin/refresh.php b/bin/refresh.php index 355e2d5..761bdda 100755 --- a/bin/refresh.php +++ b/bin/refresh.php @@ -1,40 +1,57 @@ #!/usr/bin/env php * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ namespace Legoktm\CoverMe; +use GuzzleHttp\Exception\RequestException; + require_once __DIR__ . '/../vendor/autoload.php'; $cover = new CoverLogIndex(); echo "cover: core\n"; $cover->getCore( true ); foreach ( $cover->getExtensionList( true ) as $ext ) { echo "cover: $ext\n"; - $cover->getExtension( $ext, true ); + try { + $cover->getExtension( $ext, true ); + } catch ( RequestException $e ) { + // e.g. 404 Not Found if the file is temporarily missing (race conditions, or buggy CI). + // In the frontend, we let this propagate (using the PHP error page as our error page), + // but catch it here to continue pre-caching other exensions + echo "\t" . $e->getMessage() . "\n"; + } } $perfLog = new PerformanceLogIndex(); foreach ( $perfLog->getTypes() as $type ) { echo "xenon: $type\n"; - $fname = $perfLog->fetch( $perfLog->getLatestLog( $type ) ); + try { + $fname = $perfLog->fetch( $perfLog->getLatestLog( $type ) ); + } catch ( RequestException $e ) { + // e.g. 404 Not Found. If an entry point is no longer in use, + // and we haven't removed it yet from our list, it might eventually expire + // and not have any file. + echo "\t" . $e->getMessage() . "\n"; + continue; + } $log = new XenonLogs(); $log->parse( $fname ); $log->save( "$fname.cache" ); } echo "Done!\n"; diff --git a/src/CoverLogIndex.php b/src/CoverLogIndex.php index 391d9e2..1b1a63a 100644 --- a/src/CoverLogIndex.php +++ b/src/CoverLogIndex.php @@ -1,108 +1,109 @@ * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ namespace Legoktm\CoverMe; use GuzzleHttp\Client; class CoverLogIndex { /** * @var Client */ private $client; private $dir; /** * @var string[] */ private $extensions; public function __construct() { $this->dir = dirname( __DIR__ ) . '/coverlogs'; $this->client = new Client( [ 'headers' => [ 'User-Agent' => 'https://tools.wmflabs.org/coverme/ 0.1' ] ] ); } /** * @param bool $force * * @return string */ public function getCore( $force = false ) { $fname = "{$this->dir}/core.xml"; if ( $force || !file_exists( $fname ) ) { $this->client->get( 'https://doc.wikimedia.org/cover/mediawiki-core/clover.xml.gz', [ 'sink' => "$fname.gz" ] ); if ( file_exists( $fname ) ) { unlink( $fname ); } shell_exec( 'gunzip ' . escapeshellarg( "$fname.gz" ) ); } return $fname; } /** * @param bool $force * @return string[] */ public function getExtensionList( $force = false ) { if ( $this->extensions ) { return $this->extensions; } $cache = "{$this->dir}/extensions.json"; if ( $force || !file_exists( $cache ) ) { $resp = $this->client->get( 'https://doc.wikimedia.org/cover-extensions/' ); $matches = []; preg_match_all( '!a class="cover-item" href="\./(.*?)/"!', $resp->getBody(), $matches ); $this->extensions = $matches[1]; file_put_contents( $cache, json_encode( $this->extensions ) ); } else { $this->extensions = json_decode( file_get_contents( $cache ) ); } return $this->extensions; } /** * @param string $ext * @param bool $force * * @return string + * @throws GuzzleHttp\Exception\RequestException */ public function getExtension( $ext, $force = false ) { $fname = "{$this->dir}/$ext.xml"; if ( $force || !file_exists( $fname ) ) { $this->client->get( "https://doc.wikimedia.org/cover-extensions/$ext/clover.xml", [ 'sink' => $fname ] ); } return $fname; } } diff --git a/src/PerformanceLogIndex.php b/src/PerformanceLogIndex.php index 40e2177..7c9a344 100644 --- a/src/PerformanceLogIndex.php +++ b/src/PerformanceLogIndex.php @@ -1,93 +1,94 @@ * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ namespace Legoktm\CoverMe; use GuzzleHttp\Client; class PerformanceLogIndex { /** * @var string */ private $dir; /** * @var Client */ private $client; public function __construct() { $this->dir = dirname( __DIR__ ) . '/xenonlogs/'; $this->client = new Client( [ 'headers' => [ 'User-Agent' => 'https://tools.wmflabs.org/coverme/ 0.1' ] ] ); } /** * @return string[] */ public function getTypes() { return [ 'all', 'index', 'api', 'rest', 'RunSingleJob', 'thumb', 'load', 'touch' ]; } /** * @param string $type * * @return string */ public function getLatestLog( $type = 'all' ) { $resp = $this->client->get( 'https://performance.wikimedia.org/xenon/logs/daily/' ); $matches = []; preg_match( "/href=\"(.*?)\.$type\.log\"/", $resp->getBody(), $matches ); return "https://performance.wikimedia.org/xenon/logs/daily/{$matches[1]}.$type.log"; } /** * @param string $type * * @return string */ public function getLatestLocalLog( $type = 'all' ) { $files = scandir( $this->dir ); sort( $files ); $ending = ".$type.log"; foreach ( $files as $file ) { if ( substr( $file, -strlen( $ending ) ) === $ending ) { return "{$this->dir}/$file"; } } } /** * @param string $url * * @return string + * @throws GuzzleHttp\Exception\RequestException */ public function fetch( $url ) { $fname = $this->dir . basename( $url ); if ( !file_exists( $fname ) ) { $this->client->get( $url, [ 'sink' => $fname ] ); } return $fname; } }