. Change line 13 to $res = $dbr->select( 'page', 'page_title', array( 'page_namespace' => NS_CATEGORY, 'page_title' => $existing_cats) );
User @labster is busy and does not have time to create an upstream task at the moment so I am doing it for him
diff --git a/AutoCreateCategoryPages.body.php b/AutoCreateCategoryPages.body.php index f0646d3..637e3b0 100644 --- a/AutoCreateCategoryPages.body.php +++ b/AutoCreateCategoryPages.body.php @@ -2,15 +2,15 @@ class AutoCreateCategoryPages { /** - * Get an array of existing categories, with the name in the key and sort key in the value. + * Get an array of existing categories on this page, with the unprefixed name * * @return array */ - static function getExistingCategories() { - // TODO: cache this. Probably have to add to said cache every time a category page is created, - // by us or manually + static function getExistingCategories( $page_cats ) { $dbr = wfGetDB( DB_SLAVE ); - $res = $dbr->select( 'page', 'page_title', array( 'page_namespace' => NS_CATEGORY ) ); + $res = $dbr->select( 'page', 'page_title', + array( 'page_namespace' => NS_CATEGORY, 'page_title' => $page_cats ) + ); $categories = array(); foreach ( $res as $row ) { @@ -37,7 +37,7 @@ class AutoCreateCategoryPages { // array keys will cast numeric category names to ints // so we need to cast them back to strings to avoid potentially breaking things! $page_cats = array_map( 'strval', array_keys( $page_cats ) ); - $existing_cats = self::getExistingCategories(); + $existing_cats = self::getExistingCategories( $page_cats ); // Determine which categories on page do not exist $new_cats = array_diff( $page_cats, $existing_cats );
"The logic here is that there is already an index on (page_namespace, page_title) so lookups would be very fast, given the number of categories typically on a page. But as it stands it grabs the entire namespace, which could be tons of database network traffic on a large wiki.
select sum( length(page_title) ) from page where page_namespace = 14;
[sum( length(page_title) )] => 1091582
1MB of internal db traffic per page save would be not cool." -@labster