.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
wikidata.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This module implements the Wikidata engine. Some implementations are shared
3from :ref:`wikipedia engine`.
4
5"""
6# pylint: disable=missing-class-docstring
7
8from hashlib import md5
9from urllib.parse import urlencode, unquote
10from json import loads
11
12from dateutil.parser import isoparse
13from babel.dates import format_datetime, format_date, format_time, get_datetime_format
14
15from searx.data import WIKIDATA_UNITS
16from searx.network import post, get
17from searx.utils import searxng_useragent, get_string_replaces_function
18from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
19from searx.engines.wikipedia import (
20 fetch_wikimedia_traits,
21 get_wiki_params,
22)
23from searx.enginelib.traits import EngineTraits
24
25# about
26about = {
27 "website": 'https://wikidata.org/',
28 "wikidata_id": 'Q2013',
29 "official_api_documentation": 'https://query.wikidata.org/',
30 "use_official_api": True,
31 "require_api_key": False,
32 "results": 'JSON',
33}
34
35display_type = ["infobox"]
36"""A list of display types composed from ``infobox`` and ``list``. The latter
37one will add a hit to the result list. The first one will show a hit in the
38info box. Both values can be set, or one of the two can be set."""
39
40
41# SPARQL
42SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
43SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
44WIKIDATA_PROPERTIES = {
45 'P434': 'MusicBrainz',
46 'P435': 'MusicBrainz',
47 'P436': 'MusicBrainz',
48 'P966': 'MusicBrainz',
49 'P345': 'IMDb',
50 'P2397': 'YouTube',
51 'P1651': 'YouTube',
52 'P2002': 'Twitter',
53 'P2013': 'Facebook',
54 'P2003': 'Instagram',
55 'P4033': 'Mastodon',
56 'P11947': 'Lemmy',
57 'P12622': 'PeerTube',
58}
59
60# SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
61# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
62# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
63# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
64# optimization:
65# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
66# * https://github.com/blazegraph/database/wiki/QueryHints
67QUERY_TEMPLATE = """
68SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%
69WHERE
70{
71 SERVICE wikibase:mwapi {
72 bd:serviceParam wikibase:endpoint "www.wikidata.org";
73 wikibase:api "EntitySearch";
74 wikibase:limit 1;
75 mwapi:search "%QUERY%";
76 mwapi:language "%LANGUAGE%".
77 ?item wikibase:apiOutputItem mwapi:item.
78 }
79 hint:Prior hint:runFirst "true".
80
81 %WHERE%
82
83 SERVICE wikibase:label {
84 bd:serviceParam wikibase:language "%LANGUAGE%,en".
85 ?item rdfs:label ?itemLabel .
86 ?item schema:description ?itemDescription .
87 %WIKIBASE_LABELS%
88 }
89
90}
91GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long %GROUP_BY%
92"""
93
94# Get the calendar names and the property names
95QUERY_PROPERTY_NAMES = """
96SELECT ?item ?name
97WHERE {
98 {
99 SELECT ?item
100 WHERE { ?item wdt:P279* wd:Q12132 }
101 } UNION {
102 VALUES ?item { %ATTRIBUTES% }
103 }
104 OPTIONAL { ?item rdfs:label ?name. }
105}
106"""
107
108# see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)
109# hard coded here to avoid to an additional SPARQL request when the server starts
110DUMMY_ENTITY_URLS = set(
111 "http://www.wikidata.org/entity/" + wid for wid in ("Q4115189", "Q13406268", "Q15397819", "Q17339402")
112)
113
114
115# https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
116# https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
117sparql_string_escape = get_string_replaces_function(
118 # fmt: off
119 {
120 '\t': '\\\t',
121 '\n': '\\\n',
122 '\r': '\\\r',
123 '\b': '\\\b',
124 '\f': '\\\f',
125 '\"': '\\\"',
126 '\'': '\\\'',
127 '\\': '\\\\'
128 }
129 # fmt: on
130)
131
132replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
133
134
136 # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
137 return {'Accept': 'application/sparql-results+json', 'User-Agent': searxng_useragent()}
138
139
140def get_label_for_entity(entity_id, language):
141 name = WIKIDATA_PROPERTIES.get(entity_id)
142 if name is None:
143 name = WIKIDATA_PROPERTIES.get((entity_id, language))
144 if name is None:
145 name = WIKIDATA_PROPERTIES.get((entity_id, language.split('-')[0]))
146 if name is None:
147 name = WIKIDATA_PROPERTIES.get((entity_id, 'en'))
148 if name is None:
149 name = entity_id
150 return name
151
152
153def send_wikidata_query(query, method='GET', **kwargs):
154 if method == 'GET':
155 # query will be cached by wikidata
156 http_response = get(SPARQL_ENDPOINT_URL + '?' + urlencode({'query': query}), headers=get_headers(), **kwargs)
157 else:
158 # query won't be cached by wikidata
159 http_response = post(SPARQL_ENDPOINT_URL, data={'query': query}, headers=get_headers(), **kwargs)
160 if http_response.status_code != 200:
161 logger.debug('SPARQL endpoint error %s', http_response.content.decode())
162 logger.debug('request time %s', str(http_response.elapsed))
163 http_response.raise_for_status()
164 return loads(http_response.content.decode())
165
166
167def request(query, params):
168
169 eng_tag, _wiki_netloc = get_wiki_params(params['searxng_locale'], traits)
170 query, attributes = get_query(query, eng_tag)
171 logger.debug("request --> language %s // len(attributes): %s", eng_tag, len(attributes))
172
173 params['method'] = 'POST'
174 params['url'] = SPARQL_ENDPOINT_URL
175 params['data'] = {'query': query}
176 params['headers'] = get_headers()
177 params['language'] = eng_tag
178 params['attributes'] = attributes
179
180 return params
181
182
183def response(resp):
184
185 results = []
186 jsonresponse = loads(resp.content.decode())
187
188 language = resp.search_params['language']
189 attributes = resp.search_params['attributes']
190 logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
191
192 seen_entities = set()
193 for result in jsonresponse.get('results', {}).get('bindings', []):
194 attribute_result = {key: value['value'] for key, value in result.items()}
195 entity_url = attribute_result['item']
196 if entity_url not in seen_entities and entity_url not in DUMMY_ENTITY_URLS:
197 seen_entities.add(entity_url)
198 results += get_results(attribute_result, attributes, language)
199 else:
200 logger.debug('The SPARQL request returns duplicate entities: %s', str(attribute_result))
201
202 return results
203
204
205_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
206_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/"
207
208
209def get_thumbnail(img_src):
210 """Get Thumbnail image from wikimedia commons
211
212 Images from commons.wikimedia.org are (HTTP) redirected to
213 upload.wikimedia.org. The redirected URL can be calculated by this
214 function.
215
216 - https://stackoverflow.com/a/33691240
217
218 """
219 logger.debug('get_thumbnail(): %s', img_src)
220 if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
221 img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_"))
222 img_src_name_first = img_src_name
223 img_src_name_second = img_src_name
224
225 if ".svg" in img_src_name.split()[0]:
226 img_src_name_second = img_src_name + ".png"
227
228 img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
229 img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
230 img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
231 img_src = (
232 _IMG_SRC_NEW_URL_PREFIX
233 + img_src_name_md5[0]
234 + "/"
235 + img_src_name_md5[0:2]
236 + "/"
237 + img_src_name_first
238 + "/"
239 + img_src_size
240 + "px-"
241 + img_src_name_second
242 )
243 logger.debug('get_thumbnail() redirected: %s', img_src)
244
245 return img_src
246
247
248def get_results(attribute_result, attributes, language):
249 # pylint: disable=too-many-branches
250 results = []
251 infobox_title = attribute_result.get('itemLabel')
252 infobox_id = attribute_result['item']
253 infobox_id_lang = None
254 infobox_urls = []
255 infobox_attributes = []
256 infobox_content = attribute_result.get('itemDescription', [])
257 img_src = None
258 img_src_priority = 0
259
260 for attribute in attributes:
261 value = attribute.get_str(attribute_result, language)
262 if value is not None and value != '':
263 attribute_type = type(attribute)
264
265 if attribute_type in (WDURLAttribute, WDArticle):
266 # get_select() method : there is group_concat(distinct ...;separator=", ")
267 # split the value here
268 for url in value.split(', '):
269 infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
270 # "normal" results (not infobox) include official website and Wikipedia links.
271 if "list" in display_type and (attribute.kwargs.get('official') or attribute_type == WDArticle):
272 results.append({'title': infobox_title, 'url': url, "content": infobox_content})
273
274 # update the infobox_id with the wikipedia URL
275 # first the local wikipedia URL, and as fallback the english wikipedia URL
276 if attribute_type == WDArticle and (
277 (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en'
278 ):
279 infobox_id_lang = attribute.language
280 infobox_id = url
281 elif attribute_type == WDImageAttribute:
282 # this attribute is an image.
283 # replace the current image only the priority is lower
284 # (the infobox contain only one image).
285 if attribute.priority > img_src_priority:
286 img_src = get_thumbnail(value)
287 img_src_priority = attribute.priority
288 elif attribute_type == WDGeoAttribute:
289 # geocoordinate link
290 # use the area to get the OSM zoom
291 # Note: ignore the unit (must be km² otherwise the calculation is wrong)
292 # Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount
293 area = attribute_result.get('P2046')
294 osm_zoom = area_to_osm_zoom(area) if area else 19
295 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
296 if url:
297 infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name})
298 else:
299 infobox_attributes.append(
300 {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name}
301 )
302
303 if infobox_id:
304 infobox_id = replace_http_by_https(infobox_id)
305
306 # add the wikidata URL at the end
307 infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
308
309 if (
310 "list" in display_type
311 and img_src is None
312 and len(infobox_attributes) == 0
313 and len(infobox_urls) == 1
314 and len(infobox_content) == 0
315 ):
316 results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
317 elif "infobox" in display_type:
318 results.append(
319 {
320 'infobox': infobox_title,
321 'id': infobox_id,
322 'content': infobox_content,
323 'img_src': img_src,
324 'urls': infobox_urls,
325 'attributes': infobox_attributes,
326 }
327 )
328 return results
329
330
331def get_query(query, language):
332 attributes = get_attributes(language)
333 select = [a.get_select() for a in attributes]
334 where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
335 wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
336 group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
337 query = (
338 QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query))
339 .replace('%SELECT%', ' '.join(select))
340 .replace('%WHERE%', '\n '.join(where))
341 .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))
342 .replace('%GROUP_BY%', ' '.join(group_by))
343 .replace('%LANGUAGE%', language)
344 )
345 return query, attributes
346
347
348def get_attributes(language):
349 # pylint: disable=too-many-statements
350 attributes = []
351
352 def add_value(name):
353 attributes.append(WDAttribute(name))
354
355 def add_amount(name):
356 attributes.append(WDAmountAttribute(name))
357
358 def add_label(name):
359 attributes.append(WDLabelAttribute(name))
360
361 def add_url(name, url_id=None, url_path_prefix=None, **kwargs):
362 attributes.append(WDURLAttribute(name, url_id, url_path_prefix, kwargs))
363
364 def add_image(name, url_id=None, priority=1):
365 attributes.append(WDImageAttribute(name, url_id, priority))
366
367 def add_date(name):
368 attributes.append(WDDateAttribute(name))
369
370 # Dates
371 for p in [
372 'P571', # inception date
373 'P576', # dissolution date
374 'P580', # start date
375 'P582', # end date
376 'P569', # date of birth
377 'P570', # date of death
378 'P619', # date of spacecraft launch
379 'P620',
380 ]: # date of spacecraft landing
381 add_date(p)
382
383 for p in [
384 'P27', # country of citizenship
385 'P495', # country of origin
386 'P17', # country
387 'P159',
388 ]: # headquarters location
389 add_label(p)
390
391 # Places
392 for p in [
393 'P36', # capital
394 'P35', # head of state
395 'P6', # head of government
396 'P122', # basic form of government
397 'P37',
398 ]: # official language
399 add_label(p)
400
401 add_value('P1082') # population
402 add_amount('P2046') # area
403 add_amount('P281') # postal code
404 add_label('P38') # currency
405 add_amount('P2048') # height (building)
406
407 # Media
408 for p in [
409 'P400', # platform (videogames, computing)
410 'P50', # author
411 'P170', # creator
412 'P57', # director
413 'P175', # performer
414 'P178', # developer
415 'P162', # producer
416 'P176', # manufacturer
417 'P58', # screenwriter
418 'P272', # production company
419 'P264', # record label
420 'P123', # publisher
421 'P449', # original network
422 'P750', # distributed by
423 'P86',
424 ]: # composer
425 add_label(p)
426
427 add_date('P577') # publication date
428 add_label('P136') # genre (music, film, artistic...)
429 add_label('P364') # original language
430 add_value('P212') # ISBN-13
431 add_value('P957') # ISBN-10
432 add_label('P275') # copyright license
433 add_label('P277') # programming language
434 add_value('P348') # version
435 add_label('P840') # narrative location
436
437 # Languages
438 add_value('P1098') # number of speakers
439 add_label('P282') # writing system
440 add_label('P1018') # language regulatory body
441 add_value('P218') # language code (ISO 639-1)
442
443 # Other
444 add_label('P169') # ceo
445 add_label('P112') # founded by
446 add_label('P1454') # legal form (company, organization)
447 add_label('P137') # operator (service, facility, ...)
448 add_label('P1029') # crew members (tripulation)
449 add_label('P225') # taxon name
450 add_value('P274') # chemical formula
451 add_label('P1346') # winner (sports, contests, ...)
452 add_value('P1120') # number of deaths
453 add_value('P498') # currency code (ISO 4217)
454
455 # URL
456 add_url('P856', official=True) # official website
457 attributes.append(WDArticle(language)) # wikipedia (user language)
458 if not language.startswith('en'):
459 attributes.append(WDArticle('en')) # wikipedia (english)
460
461 add_url('P1324') # source code repository
462 add_url('P1581') # blog
463 add_url('P434', url_id='musicbrainz_artist')
464 add_url('P435', url_id='musicbrainz_work')
465 add_url('P436', url_id='musicbrainz_release_group')
466 add_url('P966', url_id='musicbrainz_label')
467 add_url('P345', url_id='imdb_id')
468 add_url('P2397', url_id='youtube_channel')
469 add_url('P1651', url_id='youtube_video')
470 add_url('P2002', url_id='twitter_profile')
471 add_url('P2013', url_id='facebook_profile')
472 add_url('P2003', url_id='instagram_profile')
473
474 # Fediverse
475 add_url('P4033', url_path_prefix='/@') # Mastodon user
476 add_url('P11947', url_path_prefix='/c/') # Lemmy community
477 add_url('P12622', url_path_prefix='/c/') # PeerTube channel
478
479 # Map
480 attributes.append(WDGeoAttribute('P625'))
481
482 # Image
483 add_image('P15', priority=1, url_id='wikimedia_image') # route map
484 add_image('P242', priority=2, url_id='wikimedia_image') # locator map
485 add_image('P154', priority=3, url_id='wikimedia_image') # logo
486 add_image('P18', priority=4, url_id='wikimedia_image') # image
487 add_image('P41', priority=5, url_id='wikimedia_image') # flag
488 add_image('P2716', priority=6, url_id='wikimedia_image') # collage
489 add_image('P2910', priority=7, url_id='wikimedia_image') # icon
490
491 return attributes
492
493
495 __slots__ = ('name',)
496
497 def __init__(self, name):
498 self.name = name
499
500 def get_select(self):
501 return '(group_concat(distinct ?{name};separator=", ") as ?{name}s)'.replace('{name}', self.name)
502
503 def get_label(self, language):
504 return get_label_for_entity(self.name, language)
505
506 def get_where(self):
507 return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
508
510 return ""
511
512 def get_group_by(self):
513 return ""
514
515 def get_str(self, result, language): # pylint: disable=unused-argument
516 return result.get(self.name + 's')
517
518 def __repr__(self):
519 return '<' + str(type(self).__name__) + ':' + self.name + '>'
520
521
523 def get_select(self):
524 return '?{name} ?{name}Unit'.replace('{name}', self.name)
525
526 def get_where(self):
527 return """ OPTIONAL { ?item p:{name} ?{name}Node .
528 ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
529 OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace(
530 '{name}', self.name
531 )
532
533 def get_group_by(self):
534 return self.get_select()
535
536 def get_str(self, result, language):
537 value = result.get(self.name)
538 unit = result.get(self.name + "Unit")
539 if unit is not None:
540 unit = unit.replace('http://www.wikidata.org/entity/', '')
541 return value + " " + get_label_for_entity(unit, language)
542 return value
543
544
546
547 __slots__ = 'language', 'kwargs'
548
549 def __init__(self, language, kwargs=None):
550 super().__init__('wikipedia')
551 self.language = language
552 self.kwargs = kwargs or {}
553
554 def get_label(self, language):
555 # language parameter is ignored
556 return "Wikipedia ({language})".replace('{language}', self.language)
557
558 def get_select(self):
559 return "?article{language} ?articleName{language}".replace('{language}', self.language)
560
561 def get_where(self):
562 return """OPTIONAL { ?article{language} schema:about ?item ;
563 schema:inLanguage "{language}" ;
564 schema:isPartOf <https://{language}.wikipedia.org/> ;
565 schema:name ?articleName{language} . }""".replace(
566 '{language}', self.language
567 )
568
569 def get_group_by(self):
570 return self.get_select()
571
572 def get_str(self, result, language):
573 key = 'article{language}'.replace('{language}', self.language)
574 return result.get(key)
575
576
578 def get_select(self):
579 return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
580
581 def get_where(self):
582 return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
583
585 return "?{name} rdfs:label ?{name}Label .".replace('{name}', self.name)
586
587 def get_str(self, result, language):
588 return result.get(self.name + 'Labels')
589
590
592
593 HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/'
594
595 __slots__ = 'url_id', 'url_path_prefix', 'kwargs'
596
597 def __init__(self, name, url_id=None, url_path_prefix=None, kwargs=None):
598 """
599 :param url_id: ID matching one key in ``external_urls.json`` for
600 converting IDs to full URLs.
601
602 :param url_path_prefix: Path prefix if the values are of format
603 ``account@domain``. If provided, value are rewritten to
604 ``https://<domain><url_path_prefix><account>``. For example::
605
606 WDURLAttribute('P4033', url_path_prefix='/@')
607
608 Adds Property `P4033 <https://www.wikidata.org/wiki/Property:P4033>`_
609 to the wikidata query. This field might return for example
610 ``libreoffice@fosstodon.org`` and the URL built from this is then:
611
612 - account: ``libreoffice``
613 - domain: ``fosstodon.org``
614 - result url: https://fosstodon.org/@libreoffice
615 """
616
617 super().__init__(name)
618 self.url_id = url_id
619 self.url_path_prefix = url_path_prefix
620 self.kwargs = kwargs
621
622 def get_str(self, result, language):
623 value = result.get(self.name + 's')
624 if not value:
625 return None
626
627 value = value.split(',')[0]
628 if self.url_id:
629 url_id = self.url_id
630 if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
631 value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :]
632 url_id = 'wikimedia_image'
633 return get_external_url(url_id, value)
634
635 if self.url_path_prefix:
636 [account, domain] = [x.strip("@ ") for x in value.rsplit('@', 1)]
637 return f"https://{domain}{self.url_path_prefix}{account}"
638
639 return value
640
641
643 def get_label(self, language):
644 return "OpenStreetMap"
645
646 def get_select(self):
647 return "?{name}Lat ?{name}Long".replace('{name}', self.name)
648
649 def get_where(self):
650 return """OPTIONAL { ?item p:{name}/psv:{name} [
651 wikibase:geoLatitude ?{name}Lat ;
652 wikibase:geoLongitude ?{name}Long ] }""".replace(
653 '{name}', self.name
654 )
655
656 def get_group_by(self):
657 return self.get_select()
658
659 def get_str(self, result, language):
660 latitude = result.get(self.name + 'Lat')
661 longitude = result.get(self.name + 'Long')
662 if latitude and longitude:
663 return latitude + ' ' + longitude
664 return None
665
666 def get_geo_url(self, result, osm_zoom=19):
667 latitude = result.get(self.name + 'Lat')
668 longitude = result.get(self.name + 'Long')
669 if latitude and longitude:
670 return get_earth_coordinates_url(latitude, longitude, osm_zoom)
671 return None
672
673
675
676 __slots__ = ('priority',)
677
678 def __init__(self, name, url_id=None, priority=100):
679 super().__init__(name, url_id)
680 self.priority = priority
681
682
684 def get_select(self):
685 return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
686
687 def get_where(self):
688 # To remove duplicate, add
689 # FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }
690 # this filter is too slow, so the response function ignore duplicate results
691 # (see the seen_entities variable)
692 return """OPTIONAL { ?item p:{name}/psv:{name} [
693 wikibase:timeValue ?{name} ;
694 wikibase:timePrecision ?{name}timePrecision ;
695 wikibase:timeTimezone ?{name}timeZone ;
696 wikibase:timeCalendarModel ?{name}timeCalendar ] . }
697 hint:Prior hint:rangeSafe true;""".replace(
698 '{name}', self.name
699 )
700
701 def get_group_by(self):
702 return self.get_select()
703
704 def format_8(self, value, locale): # pylint: disable=unused-argument
705 # precision: less than a year
706 return value
707
708 def format_9(self, value, locale):
709 year = int(value)
710 # precision: year
711 if year < 1584:
712 if year < 0:
713 return str(year - 1)
714 return str(year)
715 timestamp = isoparse(value)
716 return format_date(timestamp, format='yyyy', locale=locale)
717
718 def format_10(self, value, locale):
719 # precision: month
720 timestamp = isoparse(value)
721 return format_date(timestamp, format='MMMM y', locale=locale)
722
723 def format_11(self, value, locale):
724 # precision: day
725 timestamp = isoparse(value)
726 return format_date(timestamp, format='full', locale=locale)
727
728 def format_13(self, value, locale):
729 timestamp = isoparse(value)
730 # precision: minute
731 return (
732 get_datetime_format(format, locale=locale)
733 .replace("'", "")
734 .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale))
735 .replace('{1}', format_date(timestamp, 'short', locale=locale))
736 )
737
738 def format_14(self, value, locale):
739 # precision: second.
740 return format_datetime(isoparse(value), format='full', locale=locale)
741
742 DATE_FORMAT = {
743 '0': ('format_8', 1000000000),
744 '1': ('format_8', 100000000),
745 '2': ('format_8', 10000000),
746 '3': ('format_8', 1000000),
747 '4': ('format_8', 100000),
748 '5': ('format_8', 10000),
749 '6': ('format_8', 1000),
750 '7': ('format_8', 100),
751 '8': ('format_8', 10),
752 '9': ('format_9', 1), # year
753 '10': ('format_10', 1), # month
754 '11': ('format_11', 0), # day
755 '12': ('format_13', 0), # hour (not supported by babel, display minute)
756 '13': ('format_13', 0), # minute
757 '14': ('format_14', 0), # second
758 }
759
760 def get_str(self, result, language):
761 value = result.get(self.name)
762 if value == '' or value is None:
763 return None
764 precision = result.get(self.name + 'timePrecision')
765 date_format = WDDateAttribute.DATE_FORMAT.get(precision)
766 if date_format is not None:
767 format_method = getattr(self, date_format[0])
768 precision = date_format[1]
769 try:
770 if precision >= 1:
771 t = value.split('-')
772 if value.startswith('-'):
773 value = '-' + t[1]
774 else:
775 value = t[0]
776 return format_method(value, language)
777 except Exception: # pylint: disable=broad-except
778 return value
779 return value
780
781
782def debug_explain_wikidata_query(query, method='GET'):
783 if method == 'GET':
784 http_response = get(SPARQL_EXPLAIN_URL + '&' + urlencode({'query': query}), headers=get_headers())
785 else:
786 http_response = post(SPARQL_EXPLAIN_URL, data={'query': query}, headers=get_headers())
787 http_response.raise_for_status()
788 return http_response.content
789
790
791def init(engine_settings=None): # pylint: disable=unused-argument
792 # WIKIDATA_PROPERTIES : add unit symbols
793 for k, v in WIKIDATA_UNITS.items():
794 WIKIDATA_PROPERTIES[k] = v['symbol']
795
796 # WIKIDATA_PROPERTIES : add property labels
797 wikidata_property_names = []
798 for attribute in get_attributes('en'):
799 if type(attribute) in (WDAttribute, WDAmountAttribute, WDURLAttribute, WDDateAttribute, WDLabelAttribute):
800 if attribute.name not in WIKIDATA_PROPERTIES:
801 wikidata_property_names.append("wd:" + attribute.name)
802 query = QUERY_PROPERTY_NAMES.replace('%ATTRIBUTES%', " ".join(wikidata_property_names))
803 jsonresponse = send_wikidata_query(query, timeout=20)
804 for result in jsonresponse.get('results', {}).get('bindings', {}):
805 name = result['name']['value']
806 lang = result['name']['xml:lang']
807 entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
808 WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
809
810
811def fetch_traits(engine_traits: EngineTraits):
812 """Uses languages evaluated from :py:obj:`wikipedia.fetch_wikimedia_traits
813 <searx.engines.wikipedia.fetch_wikimedia_traits>` and removes
814
815 - ``traits.custom['wiki_netloc']``: wikidata does not have net-locations for
816 the languages and the list of all
817
818 - ``traits.custom['WIKIPEDIA_LANGUAGES']``: not used in the wikipedia engine
819
820 """
821
822 fetch_wikimedia_traits(engine_traits)
823 engine_traits.custom['wiki_netloc'] = {}
824 engine_traits.custom['WIKIPEDIA_LANGUAGES'] = []
get_str(self, result, language)
Definition wikidata.py:536
__init__(self, language, kwargs=None)
Definition wikidata.py:549
get_str(self, result, language)
Definition wikidata.py:572
get_str(self, result, language)
Definition wikidata.py:515
get_str(self, result, language)
Definition wikidata.py:760
get_geo_url(self, result, osm_zoom=19)
Definition wikidata.py:666
get_str(self, result, language)
Definition wikidata.py:659
__init__(self, name, url_id=None, priority=100)
Definition wikidata.py:678
get_str(self, result, language)
Definition wikidata.py:587
__init__(self, name, url_id=None, url_path_prefix=None, kwargs=None)
Definition wikidata.py:597
get_str(self, result, language)
Definition wikidata.py:622
request(query, params)
Definition wikidata.py:167
get_results(attribute_result, attributes, language)
Definition wikidata.py:248
get_query(query, language)
Definition wikidata.py:331
send_wikidata_query(query, method='GET', **kwargs)
Definition wikidata.py:153
debug_explain_wikidata_query(query, method='GET')
Definition wikidata.py:782
get_attributes(language)
Definition wikidata.py:348
fetch_traits(EngineTraits engine_traits)
Definition wikidata.py:811
init(engine_settings=None)
Definition wikidata.py:791
get_label_for_entity(entity_id, language)
Definition wikidata.py:140