Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Paste
P15093
(An Untitled Masterwork)
Active
Public
Actions
Authored by
Urbanecm_WMF
on Mar 30 2021, 9:42 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Referenced Files
F34198274: raw-paste-data.txt
Mar 30 2021, 9:42 PM
2021-03-30 21:42:31 (UTC+0)
Subscribers
None
#!/usr/bin/env python3
import
requests
import
sys
LANG
=
'vi'
API_URL
=
'https://
%s
.wikipedia.org/w/api.php'
%
LANG
ARTICLE_PATH_BASE
=
'https://@@LANG@@.wikipedia.org/wiki/
%s
'
.
replace
(
'@@LANG@@'
,
LANG
)
LINK_REC_API
=
'https://api.wikimedia.org/service/linkrecommendation/v1/linkrecommendations/wikipedia/@@LANG@@/
%s
'
.
replace
(
'@@LANG@@'
,
LANG
)
USER_AGENT
=
'Urbanecm
\'
s script to generate a spreadsheet for link recommendation algorithm review (urbanecm@tools.wmflabs.org)'
s
=
requests
.
Session
()
s
.
headers
.
update
({
'User-Agent'
:
USER_AGENT
})
def
make_request
(
payload
):
return
s
.
post
(
API_URL
,
data
=
payload
)
def
get_links_api
(
article_title
):
return
LINK_REC_API
%
article_title
def
get_recommendations
(
article_title
):
r
=
s
.
get
(
get_links_api
(
article_title
))
try
:
data
=
r
.
json
()
except
:
print
(
r
.
content
,
file
=
sys
.
stderr
)
raise
return
data
.
get
(
'links'
,
[])
def
random_articles
(
number
=
150
):
payload
=
{
"action"
:
"query"
,
"format"
:
"json"
,
"list"
:
"random"
,
"rnnamespace"
:
"0"
,
"rnfilterredir"
:
"nonredirects"
,
"rnlimit"
:
number
}
r
=
make_request
(
payload
)
data
=
r
.
json
()
randomPages
=
data
.
get
(
'query'
,
{})
.
get
(
'random'
,
[])
for
record
in
randomPages
:
title
=
record
.
get
(
'title'
,
None
)
if
title
is
not
None
:
yield
title
def
get_page_length
(
page_title
):
r
=
make_request
({
"action"
:
"query"
,
"format"
:
"json"
,
"prop"
:
"revisions"
,
"titles"
:
page_title
,
"rvprop"
:
"ids|user|size"
,
"rvslots"
:
"main"
,
"rvlimit"
:
"1"
,
"rvdir"
:
"older"
})
data
=
r
.
json
()
.
get
(
'query'
,
{})
.
get
(
'pages'
,
{})
return
data
[
list
(
data
.
keys
())[
0
]]
.
get
(
'revisions'
,
[])[
0
]
.
get
(
'size'
)
article_suggested
=
0
for
random_article
in
random_articles
():
recs
=
get_recommendations
(
random_article
)
if
len
(
recs
)
==
0
:
print
(
'Skipping
%s
, no links recommended'
%
random_article
,
file
=
sys
.
stderr
)
continue
# skip article, 0 recs
#print(random_article)
links_api
=
get_links_api
(
random_article
)
print
(
"
\t
"
.
join
([
LANG
,
'=HYPERLINK("
%s
", "
%s
")'
%
(
ARTICLE_PATH_BASE
%
random_article
,
random_article
),
str
(
get_page_length
(
random_article
)),
'=HYPERLINK("
%s
", "
%s
")'
%
(
links_api
,
"API"
),
str
(
len
(
recs
)),
"FILL IN"
,
"FILL IN"
]))
article_suggested
+=
1
if
article_suggested
>=
25
:
break
Event Timeline
Urbanecm_WMF
created this paste.
Mar 30 2021, 9:42 PM
2021-03-30 21:42:31 (UTC+0)
Urbanecm_WMF
mentioned this in
T278864: Add a link: evaluate link recommendation (Mar 30 2021)
.
Mar 31 2021, 12:14 AM
2021-03-31 00:14:15 (UTC+0)
Log In to Comment