Page MenuHomePhabricator
Paste P5090

collect docs to benchmark building completion suggester docs
ActivePublic

Authored by EBernhardson on Mar 20 2017, 11:54 PM.
#<?php
$config = MediaWiki\MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' );
$conn = CirrusSearch\Connection::getPool( $config, 'codfw' );
$scorer = CirrusSearch\BuildDocument\Completion\SuggestScoringMethodFactory::getScoringMethod( 'popqual' );
$builder = new CirrusSearch\BuildDocument\Completion\SuggestBuilder( $scorer, [] );
$sourceIndex = $conn->getIndex( 'enwiki', 'content' );
$query = new \Elastica\Query();
$query->setSource(['includes' => $builder->getRequiredFields()]);
$pageAndNs = new \Elastica\Query\BoolQuery();
$pageAndNs->addShould( new Elastica\Query\Term(["namespace" => NS_MAIN]));
$pageAndNs->addShould( new Elastica\Query\Term(["redirect.namespace" => NS_MAIN]));
$pageAndNs->addMust(new \Elastica\Query\Type('page'));
$bool = new Elastica\Query\BoolQuery();
$bool->addFilter($pageAndNs);
$query->setQuery($bool);
$search = new \Elastica\Search( $conn->getClient() );
$search->setQuery($query);
$search->addIndex($sourceIndex);
$query->setSize(5000);
$res = $search->search();
$inputDocs = [];
foreach ( $search->search() as $result ) { $inputDocs[] = ['id' => $result->getId(), 'source' => $result->getSource()]; }
file_put_contents('/tmp/ebernhardson_bench_compsuggest', json_encode( $inputDocs ) );