.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
wikidata.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This module implements the Wikidata engine. Some implementations are shared
3from :ref:`wikipedia engine`.
4
5"""
6# pylint: disable=missing-class-docstring
7
8from typing import TYPE_CHECKING
9from hashlib import md5
10from urllib.parse import urlencode, unquote
11from json import loads
12
13from dateutil.parser import isoparse
14from babel.dates import format_datetime, format_date, format_time, get_datetime_format
15
16from searx.data import WIKIDATA_UNITS
17from searx.network import post, get
18from searx.utils import searx_useragent, get_string_replaces_function
19from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
20from searx.engines.wikipedia import (
21 fetch_wikimedia_traits,
22 get_wiki_params,
23)
24from searx.enginelib.traits import EngineTraits
25
26if TYPE_CHECKING:
27 import logging
28
29 logger: logging.Logger
30
31traits: EngineTraits
32
33# about
34about = {
35 "website": 'https://wikidata.org/',
36 "wikidata_id": 'Q2013',
37 "official_api_documentation": 'https://query.wikidata.org/',
38 "use_official_api": True,
39 "require_api_key": False,
40 "results": 'JSON',
41}
42
43display_type = ["infobox"]
44"""A list of display types composed from ``infobox`` and ``list``. The latter
45one will add a hit to the result list. The first one will show a hit in the
46info box. Both values can be set, or one of the two can be set."""
47
48
49# SPARQL
50SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
51SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
52WIKIDATA_PROPERTIES = {
53 'P434': 'MusicBrainz',
54 'P435': 'MusicBrainz',
55 'P436': 'MusicBrainz',
56 'P966': 'MusicBrainz',
57 'P345': 'IMDb',
58 'P2397': 'YouTube',
59 'P1651': 'YouTube',
60 'P2002': 'Twitter',
61 'P2013': 'Facebook',
62 'P2003': 'Instagram',
63 'P4033': 'Mastodon',
64 'P11947': 'Lemmy',
65 'P12622': 'PeerTube',
66}
67
68# SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
69# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
70# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
71# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
72# optimization:
73# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
74# * https://github.com/blazegraph/database/wiki/QueryHints
75QUERY_TEMPLATE = """
76SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%
77WHERE
78{
79 SERVICE wikibase:mwapi {
80 bd:serviceParam wikibase:endpoint "www.wikidata.org";
81 wikibase:api "EntitySearch";
82 wikibase:limit 1;
83 mwapi:search "%QUERY%";
84 mwapi:language "%LANGUAGE%".
85 ?item wikibase:apiOutputItem mwapi:item.
86 }
87 hint:Prior hint:runFirst "true".
88
89 %WHERE%
90
91 SERVICE wikibase:label {
92 bd:serviceParam wikibase:language "%LANGUAGE%,en".
93 ?item rdfs:label ?itemLabel .
94 ?item schema:description ?itemDescription .
95 %WIKIBASE_LABELS%
96 }
97
98}
99GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long %GROUP_BY%
100"""
101
102# Get the calendar names and the property names
103QUERY_PROPERTY_NAMES = """
104SELECT ?item ?name
105WHERE {
106 {
107 SELECT ?item
108 WHERE { ?item wdt:P279* wd:Q12132 }
109 } UNION {
110 VALUES ?item { %ATTRIBUTES% }
111 }
112 OPTIONAL { ?item rdfs:label ?name. }
113}
114"""
115
116# see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)
117# hard coded here to avoid to an additional SPARQL request when the server starts
118DUMMY_ENTITY_URLS = set(
119 "http://www.wikidata.org/entity/" + wid for wid in ("Q4115189", "Q13406268", "Q15397819", "Q17339402")
120)
121
122
123# https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
124# https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
125sparql_string_escape = get_string_replaces_function(
126 # fmt: off
127 {
128 '\t': '\\\t',
129 '\n': '\\\n',
130 '\r': '\\\r',
131 '\b': '\\\b',
132 '\f': '\\\f',
133 '\"': '\\\"',
134 '\'': '\\\'',
135 '\\': '\\\\'
136 }
137 # fmt: on
138)
139
140replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
141
142
144 # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
145 return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()}
146
147
148def get_label_for_entity(entity_id, language):
149 name = WIKIDATA_PROPERTIES.get(entity_id)
150 if name is None:
151 name = WIKIDATA_PROPERTIES.get((entity_id, language))
152 if name is None:
153 name = WIKIDATA_PROPERTIES.get((entity_id, language.split('-')[0]))
154 if name is None:
155 name = WIKIDATA_PROPERTIES.get((entity_id, 'en'))
156 if name is None:
157 name = entity_id
158 return name
159
160
161def send_wikidata_query(query, method='GET'):
162 if method == 'GET':
163 # query will be cached by wikidata
164 http_response = get(SPARQL_ENDPOINT_URL + '?' + urlencode({'query': query}), headers=get_headers())
165 else:
166 # query won't be cached by wikidata
167 http_response = post(SPARQL_ENDPOINT_URL, data={'query': query}, headers=get_headers())
168 if http_response.status_code != 200:
169 logger.debug('SPARQL endpoint error %s', http_response.content.decode())
170 logger.debug('request time %s', str(http_response.elapsed))
171 http_response.raise_for_status()
172 return loads(http_response.content.decode())
173
174
175def request(query, params):
176
177 eng_tag, _wiki_netloc = get_wiki_params(params['searxng_locale'], traits)
178 query, attributes = get_query(query, eng_tag)
179 logger.debug("request --> language %s // len(attributes): %s", eng_tag, len(attributes))
180
181 params['method'] = 'POST'
182 params['url'] = SPARQL_ENDPOINT_URL
183 params['data'] = {'query': query}
184 params['headers'] = get_headers()
185 params['language'] = eng_tag
186 params['attributes'] = attributes
187
188 return params
189
190
191def response(resp):
192
193 results = []
194 jsonresponse = loads(resp.content.decode())
195
196 language = resp.search_params['language']
197 attributes = resp.search_params['attributes']
198 logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
199
200 seen_entities = set()
201 for result in jsonresponse.get('results', {}).get('bindings', []):
202 attribute_result = {key: value['value'] for key, value in result.items()}
203 entity_url = attribute_result['item']
204 if entity_url not in seen_entities and entity_url not in DUMMY_ENTITY_URLS:
205 seen_entities.add(entity_url)
206 results += get_results(attribute_result, attributes, language)
207 else:
208 logger.debug('The SPARQL request returns duplicate entities: %s', str(attribute_result))
209
210 return results
211
212
213_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
214_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/"
215
216
217def get_thumbnail(img_src):
218 """Get Thumbnail image from wikimedia commons
219
220 Images from commons.wikimedia.org are (HTTP) redirected to
221 upload.wikimedia.org. The redirected URL can be calculated by this
222 function.
223
224 - https://stackoverflow.com/a/33691240
225
226 """
227 logger.debug('get_thumbnail(): %s', img_src)
228 if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
229 img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_"))
230 img_src_name_first = img_src_name
231 img_src_name_second = img_src_name
232
233 if ".svg" in img_src_name.split()[0]:
234 img_src_name_second = img_src_name + ".png"
235
236 img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
237 img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
238 img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
239 img_src = (
240 _IMG_SRC_NEW_URL_PREFIX
241 + img_src_name_md5[0]
242 + "/"
243 + img_src_name_md5[0:2]
244 + "/"
245 + img_src_name_first
246 + "/"
247 + img_src_size
248 + "px-"
249 + img_src_name_second
250 )
251 logger.debug('get_thumbnail() redirected: %s', img_src)
252
253 return img_src
254
255
256def get_results(attribute_result, attributes, language):
257 # pylint: disable=too-many-branches
258 results = []
259 infobox_title = attribute_result.get('itemLabel')
260 infobox_id = attribute_result['item']
261 infobox_id_lang = None
262 infobox_urls = []
263 infobox_attributes = []
264 infobox_content = attribute_result.get('itemDescription', [])
265 img_src = None
266 img_src_priority = 0
267
268 for attribute in attributes:
269 value = attribute.get_str(attribute_result, language)
270 if value is not None and value != '':
271 attribute_type = type(attribute)
272
273 if attribute_type in (WDURLAttribute, WDArticle):
274 # get_select() method : there is group_concat(distinct ...;separator=", ")
275 # split the value here
276 for url in value.split(', '):
277 infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
278 # "normal" results (not infobox) include official website and Wikipedia links.
279 if "list" in display_type and (attribute.kwargs.get('official') or attribute_type == WDArticle):
280 results.append({'title': infobox_title, 'url': url, "content": infobox_content})
281
282 # update the infobox_id with the wikipedia URL
283 # first the local wikipedia URL, and as fallback the english wikipedia URL
284 if attribute_type == WDArticle and (
285 (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en'
286 ):
287 infobox_id_lang = attribute.language
288 infobox_id = url
289 elif attribute_type == WDImageAttribute:
290 # this attribute is an image.
291 # replace the current image only the priority is lower
292 # (the infobox contain only one image).
293 if attribute.priority > img_src_priority:
294 img_src = get_thumbnail(value)
295 img_src_priority = attribute.priority
296 elif attribute_type == WDGeoAttribute:
297 # geocoordinate link
298 # use the area to get the OSM zoom
299 # Note: ignore the unit (must be kmĀ² otherwise the calculation is wrong)
300 # Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount
301 area = attribute_result.get('P2046')
302 osm_zoom = area_to_osm_zoom(area) if area else 19
303 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
304 if url:
305 infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name})
306 else:
307 infobox_attributes.append(
308 {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name}
309 )
310
311 if infobox_id:
312 infobox_id = replace_http_by_https(infobox_id)
313
314 # add the wikidata URL at the end
315 infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
316
317 if (
318 "list" in display_type
319 and img_src is None
320 and len(infobox_attributes) == 0
321 and len(infobox_urls) == 1
322 and len(infobox_content) == 0
323 ):
324 results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
325 elif "infobox" in display_type:
326 results.append(
327 {
328 'infobox': infobox_title,
329 'id': infobox_id,
330 'content': infobox_content,
331 'img_src': img_src,
332 'urls': infobox_urls,
333 'attributes': infobox_attributes,
334 }
335 )
336 return results
337
338
339def get_query(query, language):
340 attributes = get_attributes(language)
341 select = [a.get_select() for a in attributes]
342 where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
343 wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
344 group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
345 query = (
346 QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query))
347 .replace('%SELECT%', ' '.join(select))
348 .replace('%WHERE%', '\n '.join(where))
349 .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))
350 .replace('%GROUP_BY%', ' '.join(group_by))
351 .replace('%LANGUAGE%', language)
352 )
353 return query, attributes
354
355
356def get_attributes(language):
357 # pylint: disable=too-many-statements
358 attributes = []
359
360 def add_value(name):
361 attributes.append(WDAttribute(name))
362
363 def add_amount(name):
364 attributes.append(WDAmountAttribute(name))
365
366 def add_label(name):
367 attributes.append(WDLabelAttribute(name))
368
369 def add_url(name, url_id=None, url_path_prefix=None, **kwargs):
370 attributes.append(WDURLAttribute(name, url_id, url_path_prefix, kwargs))
371
372 def add_image(name, url_id=None, priority=1):
373 attributes.append(WDImageAttribute(name, url_id, priority))
374
375 def add_date(name):
376 attributes.append(WDDateAttribute(name))
377
378 # Dates
379 for p in [
380 'P571', # inception date
381 'P576', # dissolution date
382 'P580', # start date
383 'P582', # end date
384 'P569', # date of birth
385 'P570', # date of death
386 'P619', # date of spacecraft launch
387 'P620',
388 ]: # date of spacecraft landing
389 add_date(p)
390
391 for p in [
392 'P27', # country of citizenship
393 'P495', # country of origin
394 'P17', # country
395 'P159',
396 ]: # headquarters location
397 add_label(p)
398
399 # Places
400 for p in [
401 'P36', # capital
402 'P35', # head of state
403 'P6', # head of government
404 'P122', # basic form of government
405 'P37',
406 ]: # official language
407 add_label(p)
408
409 add_value('P1082') # population
410 add_amount('P2046') # area
411 add_amount('P281') # postal code
412 add_label('P38') # currency
413 add_amount('P2048') # height (building)
414
415 # Media
416 for p in [
417 'P400', # platform (videogames, computing)
418 'P50', # author
419 'P170', # creator
420 'P57', # director
421 'P175', # performer
422 'P178', # developer
423 'P162', # producer
424 'P176', # manufacturer
425 'P58', # screenwriter
426 'P272', # production company
427 'P264', # record label
428 'P123', # publisher
429 'P449', # original network
430 'P750', # distributed by
431 'P86',
432 ]: # composer
433 add_label(p)
434
435 add_date('P577') # publication date
436 add_label('P136') # genre (music, film, artistic...)
437 add_label('P364') # original language
438 add_value('P212') # ISBN-13
439 add_value('P957') # ISBN-10
440 add_label('P275') # copyright license
441 add_label('P277') # programming language
442 add_value('P348') # version
443 add_label('P840') # narrative location
444
445 # Languages
446 add_value('P1098') # number of speakers
447 add_label('P282') # writing system
448 add_label('P1018') # language regulatory body
449 add_value('P218') # language code (ISO 639-1)
450
451 # Other
452 add_label('P169') # ceo
453 add_label('P112') # founded by
454 add_label('P1454') # legal form (company, organization)
455 add_label('P137') # operator (service, facility, ...)
456 add_label('P1029') # crew members (tripulation)
457 add_label('P225') # taxon name
458 add_value('P274') # chemical formula
459 add_label('P1346') # winner (sports, contests, ...)
460 add_value('P1120') # number of deaths
461 add_value('P498') # currency code (ISO 4217)
462
463 # URL
464 add_url('P856', official=True) # official website
465 attributes.append(WDArticle(language)) # wikipedia (user language)
466 if not language.startswith('en'):
467 attributes.append(WDArticle('en')) # wikipedia (english)
468
469 add_url('P1324') # source code repository
470 add_url('P1581') # blog
471 add_url('P434', url_id='musicbrainz_artist')
472 add_url('P435', url_id='musicbrainz_work')
473 add_url('P436', url_id='musicbrainz_release_group')
474 add_url('P966', url_id='musicbrainz_label')
475 add_url('P345', url_id='imdb_id')
476 add_url('P2397', url_id='youtube_channel')
477 add_url('P1651', url_id='youtube_video')
478 add_url('P2002', url_id='twitter_profile')
479 add_url('P2013', url_id='facebook_profile')
480 add_url('P2003', url_id='instagram_profile')
481
482 # Fediverse
483 add_url('P4033', url_path_prefix='/@') # Mastodon user
484 add_url('P11947', url_path_prefix='/c/') # Lemmy community
485 add_url('P12622', url_path_prefix='/c/') # PeerTube channel
486
487 # Map
488 attributes.append(WDGeoAttribute('P625'))
489
490 # Image
491 add_image('P15', priority=1, url_id='wikimedia_image') # route map
492 add_image('P242', priority=2, url_id='wikimedia_image') # locator map
493 add_image('P154', priority=3, url_id='wikimedia_image') # logo
494 add_image('P18', priority=4, url_id='wikimedia_image') # image
495 add_image('P41', priority=5, url_id='wikimedia_image') # flag
496 add_image('P2716', priority=6, url_id='wikimedia_image') # collage
497 add_image('P2910', priority=7, url_id='wikimedia_image') # icon
498
499 return attributes
500
501
503 __slots__ = ('name',)
504
505 def __init__(self, name):
506 self.name = name
507
508 def get_select(self):
509 return '(group_concat(distinct ?{name};separator=", ") as ?{name}s)'.replace('{name}', self.name)
510
511 def get_label(self, language):
512 return get_label_for_entity(self.name, language)
513
514 def get_where(self):
515 return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
516
518 return ""
519
520 def get_group_by(self):
521 return ""
522
523 def get_str(self, result, language): # pylint: disable=unused-argument
524 return result.get(self.name + 's')
525
526 def __repr__(self):
527 return '<' + str(type(self).__name__) + ':' + self.name + '>'
528
529
531 def get_select(self):
532 return '?{name} ?{name}Unit'.replace('{name}', self.name)
533
534 def get_where(self):
535 return """ OPTIONAL { ?item p:{name} ?{name}Node .
536 ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
537 OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace(
538 '{name}', self.name
539 )
540
541 def get_group_by(self):
542 return self.get_select()
543
544 def get_str(self, result, language):
545 value = result.get(self.name)
546 unit = result.get(self.name + "Unit")
547 if unit is not None:
548 unit = unit.replace('http://www.wikidata.org/entity/', '')
549 return value + " " + get_label_for_entity(unit, language)
550 return value
551
552
554
555 __slots__ = 'language', 'kwargs'
556
557 def __init__(self, language, kwargs=None):
558 super().__init__('wikipedia')
559 self.language = language
560 self.kwargs = kwargs or {}
561
562 def get_label(self, language):
563 # language parameter is ignored
564 return "Wikipedia ({language})".replace('{language}', self.language)
565
566 def get_select(self):
567 return "?article{language} ?articleName{language}".replace('{language}', self.language)
568
569 def get_where(self):
570 return """OPTIONAL { ?article{language} schema:about ?item ;
571 schema:inLanguage "{language}" ;
572 schema:isPartOf <https://{language}.wikipedia.org/> ;
573 schema:name ?articleName{language} . }""".replace(
574 '{language}', self.language
575 )
576
577 def get_group_by(self):
578 return self.get_select()
579
580 def get_str(self, result, language):
581 key = 'article{language}'.replace('{language}', self.language)
582 return result.get(key)
583
584
586 def get_select(self):
587 return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
588
589 def get_where(self):
590 return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
591
593 return "?{name} rdfs:label ?{name}Label .".replace('{name}', self.name)
594
595 def get_str(self, result, language):
596 return result.get(self.name + 'Labels')
597
598
600
601 HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/'
602
603 __slots__ = 'url_id', 'url_path_prefix', 'kwargs'
604
605 def __init__(self, name, url_id=None, url_path_prefix=None, kwargs=None):
606 """
607 :param url_id: ID matching one key in ``external_urls.json`` for
608 converting IDs to full URLs.
609
610 :param url_path_prefix: Path prefix if the values are of format
611 ``account@domain``. If provided, value are rewritten to
612 ``https://<domain><url_path_prefix><account>``. For example::
613
614 WDURLAttribute('P4033', url_path_prefix='/@')
615
616 Adds Property `P4033 <https://www.wikidata.org/wiki/Property:P4033>`_
617 to the wikidata query. This field might return for example
618 ``libreoffice@fosstodon.org`` and the URL built from this is then:
619
620 - account: ``libreoffice``
621 - domain: ``fosstodon.org``
622 - result url: https://fosstodon.org/@libreoffice
623 """
624
625 super().__init__(name)
626 self.url_id = url_id
627 self.url_path_prefix = url_path_prefix
628 self.kwargs = kwargs
629
630 def get_str(self, result, language):
631 value = result.get(self.name + 's')
632 if not value:
633 return None
634
635 value = value.split(',')[0]
636 if self.url_id:
637 url_id = self.url_id
638 if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
639 value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :]
640 url_id = 'wikimedia_image'
641 return get_external_url(url_id, value)
642
643 if self.url_path_prefix:
644 [account, domain] = value.split('@', 1)
645 return f"https://{domain}{self.url_path_prefix}{account}"
646
647 return value
648
649
651 def get_label(self, language):
652 return "OpenStreetMap"
653
654 def get_select(self):
655 return "?{name}Lat ?{name}Long".replace('{name}', self.name)
656
657 def get_where(self):
658 return """OPTIONAL { ?item p:{name}/psv:{name} [
659 wikibase:geoLatitude ?{name}Lat ;
660 wikibase:geoLongitude ?{name}Long ] }""".replace(
661 '{name}', self.name
662 )
663
664 def get_group_by(self):
665 return self.get_select()
666
667 def get_str(self, result, language):
668 latitude = result.get(self.name + 'Lat')
669 longitude = result.get(self.name + 'Long')
670 if latitude and longitude:
671 return latitude + ' ' + longitude
672 return None
673
674 def get_geo_url(self, result, osm_zoom=19):
675 latitude = result.get(self.name + 'Lat')
676 longitude = result.get(self.name + 'Long')
677 if latitude and longitude:
678 return get_earth_coordinates_url(latitude, longitude, osm_zoom)
679 return None
680
681
683
684 __slots__ = ('priority',)
685
686 def __init__(self, name, url_id=None, priority=100):
687 super().__init__(name, url_id)
688 self.priority = priority
689
690
692 def get_select(self):
693 return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
694
695 def get_where(self):
696 # To remove duplicate, add
697 # FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }
698 # this filter is too slow, so the response function ignore duplicate results
699 # (see the seen_entities variable)
700 return """OPTIONAL { ?item p:{name}/psv:{name} [
701 wikibase:timeValue ?{name} ;
702 wikibase:timePrecision ?{name}timePrecision ;
703 wikibase:timeTimezone ?{name}timeZone ;
704 wikibase:timeCalendarModel ?{name}timeCalendar ] . }
705 hint:Prior hint:rangeSafe true;""".replace(
706 '{name}', self.name
707 )
708
709 def get_group_by(self):
710 return self.get_select()
711
712 def format_8(self, value, locale): # pylint: disable=unused-argument
713 # precision: less than a year
714 return value
715
716 def format_9(self, value, locale):
717 year = int(value)
718 # precision: year
719 if year < 1584:
720 if year < 0:
721 return str(year - 1)
722 return str(year)
723 timestamp = isoparse(value)
724 return format_date(timestamp, format='yyyy', locale=locale)
725
726 def format_10(self, value, locale):
727 # precision: month
728 timestamp = isoparse(value)
729 return format_date(timestamp, format='MMMM y', locale=locale)
730
731 def format_11(self, value, locale):
732 # precision: day
733 timestamp = isoparse(value)
734 return format_date(timestamp, format='full', locale=locale)
735
736 def format_13(self, value, locale):
737 timestamp = isoparse(value)
738 # precision: minute
739 return (
740 get_datetime_format(format, locale=locale)
741 .replace("'", "")
742 .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale))
743 .replace('{1}', format_date(timestamp, 'short', locale=locale))
744 )
745
746 def format_14(self, value, locale):
747 # precision: second.
748 return format_datetime(isoparse(value), format='full', locale=locale)
749
750 DATE_FORMAT = {
751 '0': ('format_8', 1000000000),
752 '1': ('format_8', 100000000),
753 '2': ('format_8', 10000000),
754 '3': ('format_8', 1000000),
755 '4': ('format_8', 100000),
756 '5': ('format_8', 10000),
757 '6': ('format_8', 1000),
758 '7': ('format_8', 100),
759 '8': ('format_8', 10),
760 '9': ('format_9', 1), # year
761 '10': ('format_10', 1), # month
762 '11': ('format_11', 0), # day
763 '12': ('format_13', 0), # hour (not supported by babel, display minute)
764 '13': ('format_13', 0), # minute
765 '14': ('format_14', 0), # second
766 }
767
768 def get_str(self, result, language):
769 value = result.get(self.name)
770 if value == '' or value is None:
771 return None
772 precision = result.get(self.name + 'timePrecision')
773 date_format = WDDateAttribute.DATE_FORMAT.get(precision)
774 if date_format is not None:
775 format_method = getattr(self, date_format[0])
776 precision = date_format[1]
777 try:
778 if precision >= 1:
779 t = value.split('-')
780 if value.startswith('-'):
781 value = '-' + t[1]
782 else:
783 value = t[0]
784 return format_method(value, language)
785 except Exception: # pylint: disable=broad-except
786 return value
787 return value
788
789
790def debug_explain_wikidata_query(query, method='GET'):
791 if method == 'GET':
792 http_response = get(SPARQL_EXPLAIN_URL + '&' + urlencode({'query': query}), headers=get_headers())
793 else:
794 http_response = post(SPARQL_EXPLAIN_URL, data={'query': query}, headers=get_headers())
795 http_response.raise_for_status()
796 return http_response.content
797
798
799def init(engine_settings=None): # pylint: disable=unused-argument
800 # WIKIDATA_PROPERTIES : add unit symbols
801 for k, v in WIKIDATA_UNITS.items():
802 WIKIDATA_PROPERTIES[k] = v['symbol']
803
804 # WIKIDATA_PROPERTIES : add property labels
805 wikidata_property_names = []
806 for attribute in get_attributes('en'):
807 if type(attribute) in (WDAttribute, WDAmountAttribute, WDURLAttribute, WDDateAttribute, WDLabelAttribute):
808 if attribute.name not in WIKIDATA_PROPERTIES:
809 wikidata_property_names.append("wd:" + attribute.name)
810 query = QUERY_PROPERTY_NAMES.replace('%ATTRIBUTES%', " ".join(wikidata_property_names))
811 jsonresponse = send_wikidata_query(query)
812 for result in jsonresponse.get('results', {}).get('bindings', {}):
813 name = result['name']['value']
814 lang = result['name']['xml:lang']
815 entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
816 WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
817
818
819def fetch_traits(engine_traits: EngineTraits):
820 """Uses languages evaluated from :py:obj:`wikipedia.fetch_wikimedia_traits
821 <searx.engines.wikipedia.fetch_wikimedia_traits>` and removes
822
823 - ``traits.custom['wiki_netloc']``: wikidata does not have net-locations for
824 the languages and the list of all
825
826 - ``traits.custom['WIKIPEDIA_LANGUAGES']``: not used in the wikipedia engine
827
828 """
829
830 fetch_wikimedia_traits(engine_traits)
831 engine_traits.custom['wiki_netloc'] = {}
832 engine_traits.custom['WIKIPEDIA_LANGUAGES'] = []
get_str(self, result, language)
Definition wikidata.py:544
__init__(self, language, kwargs=None)
Definition wikidata.py:557
get_str(self, result, language)
Definition wikidata.py:580
get_str(self, result, language)
Definition wikidata.py:523
get_str(self, result, language)
Definition wikidata.py:768
get_geo_url(self, result, osm_zoom=19)
Definition wikidata.py:674
get_str(self, result, language)
Definition wikidata.py:667
__init__(self, name, url_id=None, priority=100)
Definition wikidata.py:686
get_str(self, result, language)
Definition wikidata.py:595
__init__(self, name, url_id=None, url_path_prefix=None, kwargs=None)
Definition wikidata.py:605
get_str(self, result, language)
Definition wikidata.py:630
request(query, params)
Definition wikidata.py:175
get_results(attribute_result, attributes, language)
Definition wikidata.py:256
get_query(query, language)
Definition wikidata.py:339
debug_explain_wikidata_query(query, method='GET')
Definition wikidata.py:790
send_wikidata_query(query, method='GET')
Definition wikidata.py:161
get_attributes(language)
Definition wikidata.py:356
fetch_traits(EngineTraits engine_traits)
Definition wikidata.py:819
init(engine_settings=None)
Definition wikidata.py:799
get_label_for_entity(entity_id, language)
Definition wikidata.py:148