Friday, 18 December 2020

How to authenticate to Wikimedia Commons Query Service using OAuth in Python?

I am trying to use the Wikimedia Commons Query Service[1] programmatically using Python, but am having trouble authenticating via OAuth 1.

Below is a self contained Python example which does not work as expected. The expected behaviour is that a result set is returned, but instead a HTML response of the login page is returned. You can get the dependencies with pip install --user sparqlwrapper oauthlib certifi. The script should then be given the path to a text file containing the pasted output given after applying for an owner only token[2]. e.g.

Consumer token
    deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Consumer secret
    deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access token
    deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access secret
    deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef

[1] https://wcqs-beta.wmflabs.org/ ; https://diff.wikimedia.org/2020/10/29/sparql-in-the-shadow-of-structured-data-on-commons/

[2] https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers

import sys
from SPARQLWrapper import JSON, SPARQLWrapper
import certifi
from SPARQLWrapper import Wrapper
from functools import partial
from oauthlib.oauth1 import Client
 
 
ENDPOINT = "https://wcqs-beta.wmflabs.org/sparql"
QUERY = """
SELECT ?file WHERE {
  ?file wdt:P180 wd:Q42 .
}
"""
 
 
def monkeypatch_sparqlwrapper():
    # Deal with old system certificates
    if not hasattr(Wrapper.urlopener, "monkeypatched"):
        Wrapper.urlopener = partial(Wrapper.urlopener, cafile=certifi.where())
        setattr(Wrapper.urlopener, "monkeypatched", True)
 
 
def oauth_client(auth_file):
    # Read credential from file
    creds = []
    for idx, line in enumerate(auth_file):
        if idx % 2 == 0:
            continue
        creds.append(line.strip())
    return Client(*creds)
 
 
class OAuth1SPARQLWrapper(SPARQLWrapper):
    # OAuth sign SPARQL requests

    def __init__(self, *args, **kwargs):
        self.client = kwargs.pop("client")
        super().__init__(*args, **kwargs)
 
    def _createRequest(self):
        request = super()._createRequest()
        uri = request.get_full_url()
        method = request.get_method()
        body = request.data
        headers = request.headers
        new_uri, new_headers, new_body = self.client.sign(uri, method, body, headers)
        request.full_url = new_uri
        request.headers = new_headers
        request.data = new_body
        print("Sending request")
        print("Url", request.full_url)
        print("Headers", request.headers)
        print("Data", request.data)
        return request
 
 
monkeypatch_sparqlwrapper()
client = oauth_client(open(sys.argv[1]))
sparql = OAuth1SPARQLWrapper(ENDPOINT, client=client)
sparql.setQuery(QUERY)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
 
print("Results")
print(results)


from How to authenticate to Wikimedia Commons Query Service using OAuth in Python?

No comments:

Post a Comment