Page MenuHomePhabricator
Paste P3271

External links on fywiki and enwiki
ActivePublic

Authored by PleaseStand on Jun 20 2016, 6:30 AM.
Tags
None
Referenced Files
F4184285: External links on fywiki and enwiki
Jun 20 2016, 6:30 AM
Subscribers
None
See T137984#2390918. **Do not rely on these numbers.**
## fywiki.externallinks
```
MariaDB [fywiki_subset]> CREATE TABLE externallinks_nonwmf (el_from int unsigned NOT NULL, el_to_md5 binary(16) NOT NULL, KEY el_from_to_md5 (el_from, el_to_md5)) ENGINE=ARIA ROW_FORMAT=FIXED DEFAULT CHARSET=binary;
Query OK, 0 rows affected (0.02 sec)
MariaDB [fywiki_subset]> ALTER TABLE externallinks_nonwmf DISABLE KEYS;
Query OK, 0 rows affected (0.00 sec)
MariaDB [fywiki_subset]> INSERT INTO externallinks_nonwmf SELECT el_from, UNHEX(MD5(el_to)) el_to_md5 FROM externallinks WHERE el_index NOT REGEXP '^https?://org\\.(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia|wikinews|wikiversity|wikivoyage|wikimediafoundation|mediawiki|wikidata)\\.';
Query OK, 37356 rows affected (1.86 sec)
Records: 37356 Duplicates: 0 Warnings: 0
MariaDB [fywiki_subset]> ALTER TABLE externallinks_nonwmf ENABLE KEYS;
Query OK, 0 rows affected (0.03 sec)
MariaDB [fywiki_subset]> SELECT page_namespace, SUM(linkcount) total_linkcount FROM (SELECT page_namespace, COUNT(DISTINCT el_to_md5) linkcount FROM externallinks_nonwmf JOIN page ON el_from = page_id GROUP BY el_from) tmp GROUP BY page_namespace;
+----------------+-----------------+
| page_namespace | total_linkcount |
+----------------+-----------------+
| 0 | 33477 |
| 1 | 197 |
| 2 | 216 |
| 3 | 404 |
| 4 | 321 |
| 5 | 12 |
| 6 | 2403 |
| 7 | 3 |
| 8 | 7 |
| 9 | 1 |
| 10 | 87 |
| 11 | 3 |
| 15 | 2 |
+----------------+-----------------+
13 rows in set (0.60 sec)
```
| page_namespace | total_linkcount | excluding User: and talk pages | content namespaces only
| 0 | 33477 | 33477 | 33477
| 1 | 197 | 0 | 0
| 2 | 216 | 0 | 0
| 3 | 404 | 0 | 0
| 4 | 321 | 321 | 0
| 5 | 12 | 0 | 0
| 6 | 2403 | 2403 | 0
| 7 | 3 | 0 | 0
| 8 | 7 | 7 | 0
| 9 | 1 | 0 | 0
| 10 | 87 | 87 | 0
| 11 | 3 | 0 | 0
| 15 | 2 | 0 | 0
| | 37133 | 36295 | 33477
## fywiki.iwlinks
```
MariaDB [fywiki_subset]> SELECT page_namespace, COUNT(*) iwlinkcount FROM iwlinks JOIN page ON iwl_from = page_id WHERE iwl_prefix IN
-> ('acronym','advogato','aew','appropedia','aquariumwiki','arborwiki','arxiv','atmwiki','baden','battlestarwiki','bcnbio','beacha','betawiki','bluwiki','blw','botwiki','boxrec','brickwiki','bulba','c2','c2find','cache','cellwiki','centralwikia','chej','choralwiki','citizendium','ckwiss','comixpedia','communityscheme','communitywiki','comune','crazyhacks','creativecommons','creativecommonswiki','creatureswiki','cxej','dcc','dcdatabase','dcma','delicious','devmo','dict','dictionary','disinfopedia','distributedproofreaders','distributedproofreadersca','dmoz','dmozs','doi','doom_wiki','dpd','drae','dreamhost','drumcorpswiki','dwjwiki','ecoreality','ecxei','elibre','emacswiki','encyc','energiewiki','englyphwiki','enkol','eokulturcentro','esolang','ethnologue','ethnologuefamily','evowiki','exotica','eĉei','fanimutationwiki','fedora','finalfantasy','finnix','flickrphoto','flickruser','floralwiki','foldoc','forthfreak','foxwiki','freebio','freebsdman','freeculturewiki','freedomdefined','freefeel','freekiwiki','freenode','freesoft','ganfyd','gardenology','gausswiki','gentoo','genwiki','globalvoices','glossarwiki','glossarywiki','google','googledefine','googlegroups','greatlakeswiki','guildwarswiki','guildwiki','gutenberg','gutenbergwiki','h2wiki','hackerspaces','hammondwiki','hdl','heroeswiki','hrfwiki','hrwiki','hupwiki','iarchive','imdbcharacter','imdbcompany','imdbname','imdbtitle','infosecpedia','infosphere','irc','iso639-3','issn','iuridictum','jaglyphwiki','javanet','javapedia','jefo','jerseydatabase','jira','jspwiki','jstor','kamelo','karlsruhe','kinowiki','kmwiki','komicawiki','kontuwiki','koslarwiki','kpopwiki','libreplanet','linguistlist','linuxwiki','linuxwikide','liswiki','literateprograms','livepedia','localwiki','lojban','lostpedia','lqwiki','luxo','mariowiki','marveldatabase','meatball','memoryalpha','metawikisearch','mineralienatlas','moinmoin','monstropedia','mosapedia','mozcom','mozillawiki','mozillazinekb','musicbrainz','mwod','mwot','nara','nkcells','nosmoke','oeis','olpc','onelook','openfacts','openlibrary','openstreetmap','openwetware','openwiki','opera7wiki','organicdesign','orthodoxwiki','osmwiki','ourmedia','owasp','panawiki','patwiki','personaltelco','phpwiki','phwiki','planetmath','pmeg','pokewiki','pokéwiki','proofwiki','psycle','pythoninfo','pythonwiki','pywiki','quarry','reuterswiki','revo','rfc','rheinneckar','robowiki','rodovid','rowiki','rtfm','s23wiki','scholar','schoolswp','scores','scoutwiki','scramble','seapig','seattlewiki','seattlewireless','securewikidc','semantic-mw','senseislibrary','sharemap','silcode','slashdot','slwiki','sourceforge','squeak','stewardry','strategywiki','swinbrain','swtrain','tabwiki','tclerswiki','technorati','tfwiki','thelemapedia','theopedia','thinkwiki','tibiawiki','tmbw','tmnet','tmwiki','toollabs','tools','translatewiki','tviv','tvtropes','twiki','tyvawiki','uncyclopedia','unihan','unreal','urbandict','usej','usemod','vd','viaf','vikidia','vinismo','vkol','vlos','voipinfo','werelate','wikia','wikiapiary','wikiasite','wikichristian','wikicities','wikicity','wikif1','wikifur','wikihow','wikiindex','wikilemon','wikilivres','wikilivresru','wikimac-de','wikinfo','wikinvest','wikiotics','wikipapers','wikiskripta','wikisophia','wikispot','wikiti','wikitree','wikiwikiweb','wipipedia','wlug','wmar','wmat','wmau','wmch','wmcl','wmcz','wmdc','wmde','wmdeblog','wmes','wmfr','wmhk','wmhu','wmid','wmil','wmin','wmit','wmke','wmph','wmpt','wmsk','wmtw','wmuk','wmve','wmza','wookieepedia','wowwiki','wqy','wurmpedia','zrhwiki','zum','zwiki','ĉej')
-> GROUP BY page_namespace;
+----------------+-------------+
| page_namespace | iwlinkcount |
+----------------+-------------+
| 0 | 15 |
| 2 | 7 |
| 4 | 6 |
| 9 | 1 |
+----------------+-------------+
4 rows in set (0.11 sec)
```
| page_namespace | iwlinkcount | excluding User: and talk pages | content namespaces only
| 0 | 15 | 15 | 15
| 2 | 7 | 0 | 0
| 4 | 6 | 6 | 0
| 9 | 1 | 0 | 0
| | 29 | 21 | 15
## enwiki.externallinks
```
MariaDB [enwiki_subset]> CREATE TABLE externallinks_nonwmf (el_from int unsigned NOT NULL, el_to_md5 binary(16) NOT NULL, KEY el_from_to_md5 (el_from, el_to_md5)) ENGINE=ARIA ROW_FORMAT=FIXED DEFAULT CHARSET=binary;
Query OK, 0 rows affected (0.03 sec)
MariaDB [enwiki_subset]> ALTER TABLE externallinks_nonwmf DISABLE KEYS;
Query OK, 0 rows affected (0.00 sec)
MariaDB [enwiki_subset]> INSERT INTO externallinks_nonwmf SELECT el_from, UNHEX(MD5(el_to)) el_to_md5 FROM externallinks WHERE el_index NOT REGEXP '^https?://org\\.(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia|wikinews|wikiversity|wikivoyage|wikimediafoundation|mediawiki|wikidata)\\.';
Query OK, 88837942 rows affected (1 hour 2 min 42.69 sec)
Records: 88837942 Duplicates: 0 Warnings: 0
MariaDB [enwiki_subset]> ALTER TABLE externallinks_nonwmf ENABLE KEYS;
Query OK, 0 rows affected (3 min 56.60 sec)
MariaDB [enwiki_subset]> SELECT page_namespace, SUM(linkcount) total_linkcount FROM (SELECT page_namespace, COUNT(DISTINCT el_to_md5) linkcount FROM externallinks_nonwmf JOIN page ON el_from = page_id GROUP BY el_from) tmp GROUP BY page_namespace;
+----------------+-----------------+
| page_namespace | total_linkcount |
+----------------+-----------------+
| 0 | 39800352 |
| 1 | 4600599 |
| 2 | 6908453 |
| 3 | 5432338 |
| 4 | 15824939 |
| 5 | 717413 |
| 6 | 1020500 |
| 7 | 22497 |
| 8 | 302 |
| 9 | 132893 |
| 10 | 755304 |
| 11 | 41489 |
| 12 | 1219 |
| 13 | 2569 |
| 14 | 96277 |
| 15 | 64055 |
| 100 | 210599 |
| 101 | 6648 |
| 108 | 74 |
| 109 | 16547 |
| 118 | 391282 |
| 119 | 3127 |
| 447 | 2225 |
| 710 | 3 |
| 828 | 3028 |
| 829 | 2634 |
| 2600 | 207 |
+----------------+-----------------+
27 rows in set (3 min 31.45 sec)
```
| page_namespace | total_linkcount | excluding User: and talk pages | content namespaces only
| 0 | 39800352 | 39800352 | 39800352
| 1 | 4600599 | 0 | 0
| 2 | 6908453 | 0 | 0
| 3 | 5432338 | 0 | 0
| 4 | 15824939 | 15824939 | 0
| 5 | 717413 | 0 | 0
| 6 | 1020500 | 1020500 | 0
| 7 | 22497 | 0 | 0
| 8 | 302 | 302 | 0
| 9 | 132893 | 0 | 0
| 10 | 755304 | 755304 | 0
| 11 | 41489 | 0 | 0
| 12 | 1219 | 1219 | 0
| 13 | 2569 | 0 | 0
| 14 | 96277 | 96277 | 0
| 15 | 64055 | 0 | 0
| 100 | 210599 | 210599 | 0
| 101 | 6648 | 0 | 0
| 108 | 74 | 74 | 0
| 109 | 16547 | 0 | 0
| 118 | 391282 | 391282 | 0
| 119 | 3127 | 0 | 0
| 447 | 2225 | 0 | 0
| 710 | 3 | 3 | 0
| 828 | 3028 | 3028 | 0
| 829 | 2634 | 0 | 0
| 2600 | 207 | 207 | 0
| | 76057573 | 58104086 | 39800352
## enwiki.iwlinks
```
MariaDB [enwiki_subset]> SELECT page_namespace, COUNT(*) iwlinkcount FROM iwlinks JOIN page ON iwl_from = page_id WHERE iwl_prefix IN
-> ('acronym','advogato','aew','appropedia','aquariumwiki','arborwiki','arxiv','atmwiki','baden','battlestarwiki','bcnbio','beacha','betawiki','bluwiki','blw','botwiki','boxrec','brickwiki','bulba','c2','c2find','cache','cellwiki','centralwikia','chej','choralwiki','citizendium','ckwiss','comixpedia','communityscheme','communitywiki','comune','crazyhacks','creativecommons','creativecommonswiki','creatureswiki','cxej','dcc','dcdatabase','dcma','delicious','devmo','dict','dictionary','disinfopedia','distributedproofreaders','distributedproofreadersca','dmoz','dmozs','doi','doom_wiki','dpd','drae','dreamhost','drumcorpswiki','dwjwiki','ecoreality','ecxei','elibre','emacswiki','encyc','energiewiki','englyphwiki','enkol','eokulturcentro','esolang','ethnologue','ethnologuefamily','evowiki','exotica','eĉei','fanimutationwiki','fedora','finalfantasy','finnix','flickrphoto','flickruser','floralwiki','foldoc','forthfreak','foxwiki','freebio','freebsdman','freeculturewiki','freedomdefined','freefeel','freekiwiki','freenode','freesoft','ganfyd','gardenology','gausswiki','gentoo','genwiki','globalvoices','glossarwiki','glossarywiki','google','googledefine','googlegroups','greatlakeswiki','guildwarswiki','guildwiki','gutenberg','gutenbergwiki','h2wiki','hackerspaces','hammondwiki','hdl','heroeswiki','hrfwiki','hrwiki','hupwiki','iarchive','imdbcharacter','imdbcompany','imdbname','imdbtitle','infosecpedia','infosphere','irc','iso639-3','issn','iuridictum','jaglyphwiki','javanet','javapedia','jefo','jerseydatabase','jira','jspwiki','jstor','kamelo','karlsruhe','kinowiki','kmwiki','komicawiki','kontuwiki','koslarwiki','kpopwiki','libreplanet','linguistlist','linuxwiki','linuxwikide','liswiki','literateprograms','livepedia','localwiki','lojban','lostpedia','lqwiki','luxo','mariowiki','marveldatabase','meatball','memoryalpha','metawikisearch','mineralienatlas','moinmoin','monstropedia','mosapedia','mozcom','mozillawiki','mozillazinekb','musicbrainz','mwod','mwot','nara','nkcells','nosmoke','oeis','olpc','onelook','openfacts','openlibrary','openstreetmap','openwetware','openwiki','opera7wiki','organicdesign','orthodoxwiki','osmwiki','ourmedia','owasp','panawiki','patwiki','personaltelco','phpwiki','phwiki','planetmath','pmeg','pokewiki','pokéwiki','proofwiki','psycle','pythoninfo','pythonwiki','pywiki','quarry','reuterswiki','revo','rfc','rheinneckar','robowiki','rodovid','rowiki','rtfm','s23wiki','scholar','schoolswp','scores','scoutwiki','scramble','seapig','seattlewiki','seattlewireless','securewikidc','semantic-mw','senseislibrary','sharemap','silcode','slashdot','slwiki','sourceforge','squeak','stewardry','strategywiki','swinbrain','swtrain','tabwiki','tclerswiki','technorati','tfwiki','thelemapedia','theopedia','thinkwiki','tibiawiki','tmbw','tmnet','tmwiki','toollabs','tools','translatewiki','tviv','tvtropes','twiki','tyvawiki','uncyclopedia','unihan','unreal','urbandict','usej','usemod','vd','viaf','vikidia','vinismo','vkol','vlos','voipinfo','werelate','wikia','wikiapiary','wikiasite','wikichristian','wikicities','wikicity','wikif1','wikifur','wikihow','wikiindex','wikilemon','wikilivres','wikilivresru','wikimac-de','wikinfo','wikinvest','wikiotics','wikipapers','wikiskripta','wikisophia','wikispot','wikiti','wikitree','wikiwikiweb','wipipedia','wlug','wmar','wmat','wmau','wmch','wmcl','wmcz','wmdc','wmde','wmdeblog','wmes','wmfr','wmhk','wmhu','wmid','wmil','wmin','wmit','wmke','wmph','wmpt','wmsk','wmtw','wmuk','wmve','wmza','wookieepedia','wowwiki','wqy','wurmpedia','zrhwiki','zum','zwiki','ĉej')
-> GROUP BY page_namespace;
+----------------+-------------+
| page_namespace | iwlinkcount |
+----------------+-------------+
| 0 | 30791 |
| 1 | 11836 |
| 2 | 27192 |
| 3 | 353259 |
| 4 | 51960 |
| 5 | 4034 |
| 6 | 1548 |
| 7 | 49 |
| 8 | 40 |
| 9 | 189 |
| 10 | 831 |
| 11 | 420 |
| 12 | 40 |
| 13 | 14 |
| 14 | 559 |
| 15 | 57 |
| 100 | 229 |
| 101 | 132 |
| 109 | 12 |
| 118 | 153 |
| 119 | 2 |
| 447 | 5 |
| 829 | 2 |
+----------------+-------------+
23 rows in set (54.92 sec)
```
| page_namespace | iwlinkcount | excluding User: and talk pages | content namespaces only
| 0 | 30791 | 30791 | 30791
| 1 | 11836 | 0 | 0
| 2 | 27192 | 0 | 0
| 3 | 353259 | 0 | 0
| 4 | 51960 | 51960 | 0
| 5 | 4034 | 0 | 0
| 6 | 1548 | 1548 | 0
| 7 | 49 | 0 | 0
| 8 | 40 | 40 | 0
| 9 | 189 | 0 | 0
| 10 | 831 | 831 | 0
| 11 | 420 | 0 | 0
| 12 | 40 | 40 | 0
| 13 | 14 | 0 | 0
| 14 | 559 | 559 | 0
| 15 | 57 | 0 | 0
| 100 | 229 | 229 | 0
| 101 | 132 | 0 | 0
| 109 | 12 | 0 | 0
| 118 | 153 | 153 | 0
| 119 | 2 | 0 | 0
| 447 | 5 | 0 | 0
| 829 | 2 | 0 | 0
| | 483354 | 86151 | 30791