Page MenuHomePhabricator
Paste P15093

(An Untitled Masterwork)
ActivePublic

Authored by Urbanecm_WMF on Mar 30 2021, 9:42 PM.
Tags
None
Referenced Files
F34198274: raw-paste-data.txt
Mar 30 2021, 9:42 PM
Subscribers
None
#!/usr/bin/env python3
import requests
import sys
LANG = 'vi'
API_URL = 'https://%s.wikipedia.org/w/api.php' % LANG
ARTICLE_PATH_BASE = 'https://@@LANG@@.wikipedia.org/wiki/%s'.replace('@@LANG@@', LANG)
LINK_REC_API = 'https://api.wikimedia.org/service/linkrecommendation/v1/linkrecommendations/wikipedia/@@LANG@@/%s'.replace('@@LANG@@', LANG)
USER_AGENT = 'Urbanecm\'s script to generate a spreadsheet for link recommendation algorithm review (urbanecm@tools.wmflabs.org)'
s = requests.Session()
s.headers.update({'User-Agent': USER_AGENT})
def make_request(payload):
return s.post(API_URL, data=payload)
def get_links_api(article_title):
return LINK_REC_API % article_title
def get_recommendations(article_title):
r = s.get(get_links_api(article_title))
try:
data = r.json()
except:
print(r.content, file=sys.stderr)
raise
return data.get('links', [])
def random_articles(number=150):
payload = {
"action": "query",
"format": "json",
"list": "random",
"rnnamespace": "0",
"rnfilterredir": "nonredirects",
"rnlimit": number
}
r = make_request(payload)
data = r.json()
randomPages = data.get('query', {}).get('random', [])
for record in randomPages:
title = record.get('title', None)
if title is not None:
yield title
def get_page_length(page_title):
r = make_request({
"action": "query",
"format": "json",
"prop": "revisions",
"titles": page_title,
"rvprop": "ids|user|size",
"rvslots": "main",
"rvlimit": "1",
"rvdir": "older"
})
data = r.json().get('query', {}).get('pages', {})
return data[list(data.keys())[0]].get('revisions', [])[0].get('size')
article_suggested = 0
for random_article in random_articles():
recs = get_recommendations(random_article)
if len(recs) == 0:
print('Skipping %s, no links recommended' % random_article, file=sys.stderr)
continue # skip article, 0 recs
#print(random_article)
links_api = get_links_api(random_article)
print("\t".join([
LANG,
'=HYPERLINK("%s", "%s")' % (ARTICLE_PATH_BASE % random_article, random_article),
str(get_page_length(random_article)),
'=HYPERLINK("%s", "%s")' % (links_api, "API"),
str(len(recs)),
"FILL IN",
"FILL IN"
]))
article_suggested += 1
if article_suggested >= 25:
break