I am trying to use the Wikimedia Commons Query Service[1] programmatically using Python, but am having trouble authenticating via OAuth 1.
Below is a self contained Python example which does not work as expected. The expected behaviour is that a result set is returned, but instead a HTML response of the login page is returned. You can get the dependencies with pip install --user sparqlwrapper oauthlib certifi
. The script should then be given the path to a text file containing the pasted output given after applying for an owner only token[2]. e.g.
Consumer token
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Consumer secret
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access token
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
Access secret
deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef
[1] https://wcqs-beta.wmflabs.org/ ; https://diff.wikimedia.org/2020/10/29/sparql-in-the-shadow-of-structured-data-on-commons/
[2] https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers
import sys
from SPARQLWrapper import JSON, SPARQLWrapper
import certifi
from SPARQLWrapper import Wrapper
from functools import partial
from oauthlib.oauth1 import Client
ENDPOINT = "https://wcqs-beta.wmflabs.org/sparql"
QUERY = """
SELECT ?file WHERE {
?file wdt:P180 wd:Q42 .
}
"""
def monkeypatch_sparqlwrapper():
# Deal with old system certificates
if not hasattr(Wrapper.urlopener, "monkeypatched"):
Wrapper.urlopener = partial(Wrapper.urlopener, cafile=certifi.where())
setattr(Wrapper.urlopener, "monkeypatched", True)
def oauth_client(auth_file):
# Read credential from file
creds = []
for idx, line in enumerate(auth_file):
if idx % 2 == 0:
continue
creds.append(line.strip())
return Client(*creds)
class OAuth1SPARQLWrapper(SPARQLWrapper):
# OAuth sign SPARQL requests
def __init__(self, *args, **kwargs):
self.client = kwargs.pop("client")
super().__init__(*args, **kwargs)
def _createRequest(self):
request = super()._createRequest()
uri = request.get_full_url()
method = request.get_method()
body = request.data
headers = request.headers
new_uri, new_headers, new_body = self.client.sign(uri, method, body, headers)
request.full_url = new_uri
request.headers = new_headers
request.data = new_body
print("Sending request")
print("Url", request.full_url)
print("Headers", request.headers)
print("Data", request.data)
return request
monkeypatch_sparqlwrapper()
client = oauth_client(open(sys.argv[1]))
sparql = OAuth1SPARQLWrapper(ENDPOINT, client=client)
sparql.setQuery(QUERY)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
print("Results")
print(results)
from How to authenticate to Wikimedia Commons Query Service using OAuth in Python?
No comments:
Post a Comment