Page MenuHomePhabricator

Querying wikidata with pywikibot fails for items with images when user is not registered for commons
Open, HighPublic

Description

I created a new user to make some changes as a bot. That user was registered with wikidata.

I then made a new paws notebook to make some edits. The code that I tried to run was as follows:

import pywikibot
import json
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()

def mark_as_part_of_current_landtag(politican_item_id):
    item = pywikibot.ItemPage(repo, politican_item_id)
    claims = item.get()['claims']['P39']
    
    legislaturperiode = u'P2937'
    landtag_period = u"Q30321703" # 17th NRW

    # Q17781726 is: Mitglied des Lantags Nordrhein-Westfahlen
    landtag_membership = next(claim for claim in claims if claim.target.id == 'Q17781726')


    qualifier = pywikibot.Claim(repo, legislaturperiode)
    target = pywikibot.ItemPage(repo, landtag_period)
    qualifier.setTarget(target)    
    landtag_membership.addQualifier(qualifier)
    
    item.editEntity(summary='pyWikibot politican in Wahlperiode')
    
mark_as_part_of_current_landtag("Q1620526")

Essentially I am trying to add a qualifier to an existing claim. Unfortunately this fails with the following output:

WARNING: API error mwoauth-invalid-authorization-invalid-user: The authorization headers in your request are for a user that does not exist here
---------------------------------------------------------------------------
NoUsername                                Traceback (most recent call last)
<ipython-input-2-9a2c8d299850> in <module>()
     22     item.editEntity(summary='pyWikibot politican in Wahlperiode')
     23 
---> 24 mark_as_part_of_current_landtag("Q1620526")

<ipython-input-2-9a2c8d299850> in mark_as_part_of_current_landtag(politican_item_id)
      6 def mark_as_part_of_current_landtag(politican_item_id):
      7     item = pywikibot.ItemPage(repo, politican_item_id)
----> 8     claims = item.get()['claims']['P39']
      9 
     10     legislaturperiode = u'P2937'

/srv/paws/pwb/pywikibot/page.py in get(self, force, get_redirect, *args, **kwargs)
   4043         @raise NotImplementedError: a value in args or kwargs
   4044         """
-> 4045         data = super(ItemPage, self).get(force, *args, **kwargs)
   4046 
   4047         if self.isRedirectPage() and not get_redirect:

/srv/paws/pwb/pywikibot/page.py in get(self, force, *args, **kwargs)
   3646                 self.claims[pid] = []
   3647                 for claim in self._content['claims'][pid]:
-> 3648                     c = Claim.fromJSON(self.repo, claim)
   3649                     c.on_item = self
   3650                     self.claims[pid].append(c)

/srv/paws/pwb/pywikibot/page.py in fromJSON(cls, site, data)
   4477             # The default covers string, url types
   4478             claim.target = Claim.TARGET_CONVERTER.get(
-> 4479                 claim.type, lambda value, site: value)(value, site)
   4480         if 'rank' in data:  # References/Qualifiers don't have ranks
   4481             claim.rank = data['rank']

/srv/paws/pwb/pywikibot/page.py in <lambda>(value, site)
   4417             PropertyPage(site, 'P' + str(value['numeric-id'])),
   4418         'commonsMedia': lambda value, site:
-> 4419             FilePage(pywikibot.Site('commons', 'commons'), value),
   4420         'globe-coordinate': pywikibot.Coordinate.fromWikibase,
   4421         'time': lambda value, site: pywikibot.WbTime.fromWikibase(value),

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title)
   2301         """Constructor."""
   2302         self._file_revisions = {}  # dictionary to cache File history.
-> 2303         super(FilePage, self).__init__(source, title, 6)
   2304         if self.namespace() != 6:
   2305             raise ValueError(u"'%s' is not in the file namespace!" % title)

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1446                              cls, depth)
   1447                     del __kw[old_arg]
-> 1448             return obj(*__args, **__kw)
   1449 
   1450         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
   2177                 raise ValueError(u'Title must be specified and not empty '
   2178                                  'if source is a Site.')
-> 2179         super(Page, self).__init__(source, title, ns)
   2180 
   2181     @deprecate_arg("get_redirect", None)

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
    161 
    162         if isinstance(source, pywikibot.site.BaseSite):
--> 163             self._link = Link(title, source=source, defaultNamespace=ns)
    164             self._revisions = {}
    165         elif isinstance(source, Page):

/srv/paws/pwb/pywikibot/page.py in __init__(self, text, source, defaultNamespace)
   5084         # See bug T104864, defaultNamespace might have been deleted.
   5085         try:
-> 5086             self._defaultns = self._source.namespaces[defaultNamespace]
   5087         except KeyError:
   5088             self._defaultns = defaultNamespace

/srv/paws/pwb/pywikibot/site.py in namespaces(self)
   1012         """Return dict of valid namespaces on this wiki."""
   1013         if not hasattr(self, '_namespaces'):
-> 1014             self._namespaces = NamespacesDict(self._build_namespaces())
   1015         return self._namespaces
   1016 

/srv/paws/pwb/pywikibot/site.py in _build_namespaces(self)
   2610         # For versions lower than 1.14, APISite needs to override
   2611         # the defaults defined in Namespace.
-> 2612         is_mw114 = MediaWikiVersion(self.version()) >= MediaWikiVersion('1.14')
   2613 
   2614         for nsdata in self.siteinfo.get('namespaces', cache=False).values():

/srv/paws/pwb/pywikibot/site.py in version(self)
   2723         if not version:
   2724             try:
-> 2725                 version = self.siteinfo.get('generator', expiry=1).split(' ')[1]
   2726             except pywikibot.data.api.APIError:
   2727                 # May occur if you are not logged in (no API read permissions).

/srv/paws/pwb/pywikibot/site.py in get(self, key, get_default, cache, expiry)
   1674                 elif not Siteinfo._is_expired(cached[1], expiry):
   1675                     return copy.deepcopy(cached[0])
-> 1676         preloaded = self._get_general(key, expiry)
   1677         if not preloaded:
   1678             preloaded = self._get_siteinfo(key, expiry)[key]

/srv/paws/pwb/pywikibot/site.py in _get_general(self, key, expiry)
   1620                         u"', '".join(props)), _logger)
   1621             props += ['general']
-> 1622             default_info = self._get_siteinfo(props, expiry)
   1623             for prop in props:
   1624                 self._cache[prop] = default_info[prop]

/srv/paws/pwb/pywikibot/site.py in _get_siteinfo(self, prop, expiry)
   1546             # warnings are handled later
   1547             request._warning_handler = warn_handler
-> 1548             data = request.submit()
   1549         except api.APIError as e:
   1550             if e.code == 'siunknown_siprop':

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2340         cached_available = self._load_cache()
   2341         if not cached_available:
-> 2342             self._data = super(CachedRequest, self).submit()
   2343             self._write_cache(self._data)
   2344         else:

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2173                     continue
   2174                 raise NoUsername('Failed OAuth authentication for %s: %s'
-> 2175                                  % (self.site, info))
   2176             # raise error
   2177             try:

NoUsername: Failed OAuth authentication for commons:commons: The authorization headers in your request are for a user that does not exist here

After I registered the account with wikimedia commons as well the edit worked as expected. I made a new account to verify the bug with which I have not registered for commons yet, hoping it might help you debug. It is called Bug_example_bot.

Maybe I am just misunderstanding this but I do not see the connection why I would need to be registered with commons to make changes with wikidata...

Event Timeline

Chicocvenancio triaged this task as Low priority.Feb 25 2018, 8:05 PM
Chicocvenancio moved this task from Backlog to Waiting information on the PAWS board.

@Chicocvenancio is there any more info that you need from my site?

@Knuthuehne Sorry for moving it around the workboard without more context. I started writing here but never submitted.

In order of importance, the information we need to start working on this is:

  • Does it still happen?
  • Is it exclusive to PAWS, as in, if we tried do use pywikibot from the computer does the bug still present itself?

To be honest this seems to me a Pywikibot issue, but I haven't tried to reproduce it yet.

This should be fixed by T94885 AFAICT.

I just registered a new user on Wikidata (username test_123_123) and still get the same error when running the code on paws. So I guess it is still an issue.

To be honest, I kind of gave up on trying to get pywikibot to run locally a while ago due to some failed installation/setup attempts. Using it in PAWS seemed much easier which is why I can't say if this also happens locally...

Ok, @Knuthuehne, thanks for the info. I'll keep this open, but it'll be a while until I have time to look into it. At any rate, it is likely mainly a pywikibot issue, not a PAWS one.

Restricted Application added a subscriber: pywikibot-bugs-list. · View Herald TranscriptMar 9 2018, 2:26 PM
Dvorapa added a subscriber: Dvorapa.EditedMar 9 2018, 4:08 PM

I think on Pywikibot side this was solved in T183466, so this is just an issue of Pywikibot instance in PAWS not updated to some newer version (easy fix in PAWS I think)

Please try to update Pywikibot instance in PAWS and confirm whether this bug was solved by update or not.

Chicocvenancio added a comment.EditedMar 9 2018, 4:21 PM

@Dvorapa, at this time Pywikibot is updated at every deploy, I'll look into making it update at every pod start (T189311). However, the current version inside the user:test_123_123 server is 9694afa14408fc9ceb7e68ea6e3416a7c9a218e5 of Feb 23 14:23:46 2018 +0100 was this fixed after that?

To update pywikibot currently you can run cd /srv/paws/pwb; git pull in a PAWS terminal or !cd /srv/paws/pwb; git pull in a python 3 notebook.

However, I cannot reproduce this at all.

Running http://paws-public.wmflabs.org/paws-public/User:test_123_123/T168222%20(wikidata%20login).ipynb# does not give me any errors for me.

@Knuthuehne can you try again and point me to the public link of the notebook with the error?

@Chicocvenancio well, it get's only more confusing. If I take your example and change the qualifier for which I try to get the claims back to the one that it was in my original query it crashes again with the same old error:
http://paws-public.wmflabs.org/paws-public/User:test_123_123/T168222%20(wikidata%20login).ipynb

Chicocvenancio edited projects, added Pywikibot-Wikidata; removed good first bug, PAWS.EditedMar 13 2018, 1:57 PM

I'm throwing this back in pywikibot's court, querying any item with image will result in this error.

See this query for items without images that will not raise an exception.

See this other query for items that have an image and will result in an exception.

See this notebook for minimal reproducible code.
It seems pywikibot creates a FilePage object whenever there is an image. See
https://phabricator.wikimedia.org/diffusion/PWBC/browse/master/pywikibot/page.py;master$4692

There is nothing to be done on the PAWS side of things.

Chicocvenancio renamed this task from Editing wikidata with paws/pywikibot fails when user is not registered for commons to Querying wikidata with pywikibot fails for items with images when user is not registered for commons.Mar 13 2018, 2:03 PM
Tdbot added a subscriber: Tdbot.EditedSun, Feb 3, 9:18 AM

I may have reproduced this bug. It happens on Q42 but not on another item without image. Here is PAWS output after a item.get() :

WARNING: API error mwoauth-invalid-authorization-invalid-user: The authorization headers in your request are for a user that does not exist here
---------------------------------------------------------------------------
NoUsername                                Traceback (most recent call last)
<ipython-input-3-3945c6ebb807> in <module>()
----> 1 item.get()

/srv/paws/pwb/pywikibot/page.py in get(self, force, get_redirect, *args, **kwargs)
   4457         @raise NotImplementedError: a value in args or kwargs
   4458         """
-> 4459         data = super(ItemPage, self).get(force, *args, **kwargs)
   4460 
   4461         if self.isRedirectPage() and not get_redirect:

/srv/paws/pwb/pywikibot/page.py in get(self, force, *args, **kwargs)
   3950                 self.claims[pid] = []
   3951                 for claim in self._content['claims'][pid]:
-> 3952                     c = Claim.fromJSON(self.repo, claim)
   3953                     c.on_item = self
   3954                     self.claims[pid].append(c)

/srv/paws/pwb/pywikibot/page.py in fromJSON(cls, site, data)
   4894             if claim.type in cls.types or claim.type == 'wikibase-property':
   4895                 claim.target = cls.TARGET_CONVERTER.get(
-> 4896                     claim.type, lambda value, site: value)(value, site)
   4897             else:
   4898                 pywikibot.warning(

/srv/paws/pwb/pywikibot/page.py in <lambda>(value, site)
   4825             PropertyPage(site, 'P' + str(value['numeric-id'])),
   4826         'commonsMedia': lambda value, site:
-> 4827             FilePage(pywikibot.Site('commons', 'commons'), value),  # T90492
   4828         'globe-coordinate': pywikibot.Coordinate.fromWikibase,
   4829         'geo-shape': pywikibot.WbGeoShape.fromWikibase,

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1735                              cls, depth)
   1736                     del __kw[old_arg]
-> 1737             return obj(*__args, **__kw)
   1738 
   1739         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title)
   2424         """Initializer."""
   2425         self._file_revisions = {}  # dictionary to cache File history.
-> 2426         super(FilePage, self).__init__(source, title, 6)
   2427         if self.namespace() != 6:
   2428             raise ValueError("'%s' is not in the file namespace!" % title)

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1735                              cls, depth)
   1736                     del __kw[old_arg]
-> 1737             return obj(*__args, **__kw)
   1738 
   1739         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
   2273                 raise ValueError('Title must be specified and not empty '
   2274                                  'if source is a Site.')
-> 2275         super(Page, self).__init__(source, title, ns)
   2276 
   2277     @property

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
    194 
    195         if isinstance(source, pywikibot.site.BaseSite):
--> 196             self._link = Link(title, source=source, default_namespace=ns)
    197             self._revisions = {}
    198         elif isinstance(source, Page):

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1735                              cls, depth)
   1736                     del __kw[old_arg]
-> 1737             return obj(*__args, **__kw)
   1738 
   1739         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, text, source, default_namespace)
   5516         # See bug T104864, default_namespace might have been deleted.
   5517         try:
-> 5518             self._defaultns = self._source.namespaces[default_namespace]
   5519         except KeyError:
   5520             self._defaultns = default_namespace

/srv/paws/pwb/pywikibot/site.py in namespaces(self)
   1041         """Return dict of valid namespaces on this wiki."""
   1042         if not hasattr(self, '_namespaces'):
-> 1043             self._namespaces = NamespacesDict(self._build_namespaces())
   1044         return self._namespaces
   1045 

/srv/paws/pwb/pywikibot/site.py in _build_namespaces(self)
   2665         # For versions lower than 1.14, APISite needs to override
   2666         # the defaults defined in Namespace.
-> 2667         is_mw114 = self.mw_version >= '1.14'
   2668 
   2669         for nsdata in self.siteinfo.get('namespaces', cache=False).values():

/srv/paws/pwb/pywikibot/site.py in mw_version(self)
   2799         mw_ver, cache_time = getattr(self, '_mw_version_time', (None, None))
   2800         if mw_ver is None or time.time() - cache_time > 60 * 60 * 24:
-> 2801             mw_ver = MediaWikiVersion(self.version())
   2802             setattr(self, '_mw_version_time', (mw_ver, time.time()))
   2803         return mw_ver

/srv/paws/pwb/pywikibot/site.py in version(self)
   2782             try:
   2783                 version = self.siteinfo.get('generator',
-> 2784                                             expiry=1).split(' ')[1]
   2785             except pywikibot.data.api.APIError:
   2786                 # May occur if you are not logged in (no API read permissions).

/srv/paws/pwb/pywikibot/site.py in get(self, key, get_default, cache, expiry)
   1691                 elif not Siteinfo._is_expired(cached[1], expiry):
   1692                     return copy.deepcopy(cached[0])
-> 1693         preloaded = self._get_general(key, expiry)
   1694         if not preloaded:
   1695             preloaded = self._get_siteinfo(key, expiry)[key]

/srv/paws/pwb/pywikibot/site.py in _get_general(self, key, expiry)
   1637                     .format("', '".join(props)), _logger)
   1638             props += ['general']
-> 1639             default_info = self._get_siteinfo(props, expiry)
   1640             for prop in props:
   1641                 self._cache[prop] = default_info[prop]

/srv/paws/pwb/pywikibot/site.py in _get_siteinfo(self, prop, expiry)
   1560         request._warning_handler = warn_handler
   1561         try:
-> 1562             data = request.submit()
   1563         except api.APIError as e:
   1564             if e.code == 'siunknown_siprop':

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2422         cached_available = self._load_cache()
   2423         if not cached_available:
-> 2424             self._data = super(CachedRequest, self).submit()
   2425             self._write_cache(self._data)
   2426         else:

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2249                     continue
   2250                 raise NoUsername('Failed OAuth authentication for %s: %s'
-> 2251                                  % (self.site, info))
   2252             if code == 'cirrussearch-too-busy-error':  # T170647
   2253                 self.wait()

NoUsername: Failed OAuth authentication for commons:commons: The authorization headers in your request are for a user that does not exist here
Tdbot added a comment.Sun, Feb 3, 9:21 AM

Ok, and I now loggued into Commons, there is no error message again. Definitly the same bug.

Dvorapa added a comment.EditedSun, Feb 3, 8:47 PM

It seems autologin doesn't work for Wikidata class properly.