.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
wikidata_units.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
3Coordinates`_
4
5.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
6 https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
7"""
8
9__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
10
11import collections
12
13from searx import data
14from searx.engines import wikidata
15
16
18 """The mapping of the Beaufort_ contains values from 0 to 16 (55.6 m/s),
19 wind speeds greater than 200km/h (55.6 m/s) are given as 17 Bft. Thats why
20 a value of 17 Bft cannot be converted to SI.
21
22 .. hint::
23
24 Negative values or values greater 16 Bft (55.6 m/s) will throw a
25 :py:obj:`ValueError`.
26
27 _Beaufort: https://en.wikipedia.org/wiki/Beaufort_scale
28 """
29
30 # fmt: off
31 scale: list[float] = [
32 0.2, 1.5, 3.3, 5.4, 7.9,
33 10.7, 13.8, 17.1, 20.7, 24.4,
34 28.4, 32.6, 32.7, 41.1, 45.8,
35 50.8, 55.6
36 ]
37 # fmt: on
38
39 @classmethod
40 def from_si(cls, value) -> float:
41 if value < 0 or value > 55.6:
42 raise ValueError(f"invalid value {value} / the Beaufort scales from 0 to 16 (55.6 m/s)")
43 bft = 0
44 for bft, mps in enumerate(cls.scale):
45 if mps >= value:
46 break
47 return bft
48
49 @classmethod
50 def to_si(cls, value) -> float:
51 idx = round(value)
52 if idx < 0 or idx > 16:
53 raise ValueError(f"invalid value {value} / the Beaufort scales from 0 to 16 (55.6 m/s)")
54 return cls.scale[idx]
55
56
57ADDITIONAL_UNITS = [
58 {
59 "si_name": "Q11579",
60 "symbol": "°C",
61 "to_si": lambda val: val + 273.15,
62 "from_si": lambda val: val - 273.15,
63 },
64 {
65 "si_name": "Q11579",
66 "symbol": "°F",
67 "to_si": lambda val: (val + 459.67) * 5 / 9,
68 "from_si": lambda val: (val * 9 / 5) - 459.67,
69 },
70 {
71 "si_name": "Q182429",
72 "symbol": "Bft",
73 "to_si": Beaufort.to_si,
74 "from_si": Beaufort.from_si,
75 },
76]
77"""Additional items to convert from a measure unit to a SI unit (vice versa).
78
79.. code:: python
80
81 {
82 "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
83 "symbol": "°C", # symbol of the measure unit
84 "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
85 "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
86 },
87 {
88 "si_name": "Q11573",
89 "symbol": "mi",
90 "to_si": 1609.344, # convert measure value (val) to SI unit
91 "from_si": 1 / 1609.344 # convert SI value (val) measure unit
92 },
93
94The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
95or a callable_ (val in / converted value returned).
96
97.. _callable: https://docs.python.org/3/glossary.html#term-callable
98"""
99
100
101ALIAS_SYMBOLS = {
102 '°C': ('C',),
103 '°F': ('F',),
104 'mi': ('L',),
105 'Bft': ('bft',),
106}
107"""Alias symbols for known unit of measure symbols / by example::
108
109 '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
110 '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
111 'mi': ('L',), # list of alias symbols for mi (Q253276)
112"""
113
114
115SYMBOL_TO_SI = []
116UNITS_BY_SI_NAME: dict = {}
117
118
119def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
120 from_si = units_by_si_name(si_name)[symbol][pos_from_si]
121 if isinstance(from_si, (float, int)):
122 value = float(value) * from_si
123 else:
124 value = from_si(float(value))
125 return value
126
127
128def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
129 to_si = units_by_si_name(si_name)[symbol][pos_to_si]
130 if isinstance(to_si, (float, int)):
131 value = float(value) * to_si
132 else:
133 value = to_si(float(value))
134 return value
135
136
137def units_by_si_name(si_name):
138
139 global UNITS_BY_SI_NAME # pylint: disable=global-statement,global-variable-not-assigned
140 if UNITS_BY_SI_NAME:
141 return UNITS_BY_SI_NAME[si_name]
142
143 # build the catalog ..
144 for item in symbol_to_si():
145
146 item_si_name = item[pos_si_name]
147 item_symbol = item[pos_symbol]
148
149 by_symbol = UNITS_BY_SI_NAME.get(item_si_name)
150 if by_symbol is None:
151 by_symbol = {}
152 UNITS_BY_SI_NAME[item_si_name] = by_symbol
153 by_symbol[item_symbol] = item
154
155 return UNITS_BY_SI_NAME[si_name]
156
157
158pos_symbol = 0 # (alias) symbol
159pos_si_name = 1 # si_name
160pos_from_si = 2 # from_si
161pos_to_si = 3 # to_si
162pos_symbol = 4 # standardized symbol
163
164
166 """Generates a list of tuples, each tuple is a measure unit and the fields
167 in the tuple are:
168
169 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
170
171 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
172
173 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
174 multiplied by 1609.344)
175
176 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
177 100mi divided by 1609.344)
178
179 The returned list is sorted, the first items are created from
180 ``WIKIDATA_UNITS``, the second group of items is build from
181 :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
182
183 If you search this list for a symbol, then a match with a symbol from
184 Wikidata has the highest weighting (first hit in the list), followed by the
185 symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
186 given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
187
188 """
189
190 global SYMBOL_TO_SI # pylint: disable=global-statement
191 if SYMBOL_TO_SI:
192 return SYMBOL_TO_SI
193
194 # filter out units which can't be normalized to a SI unit and filter out
195 # units without a symbol / arcsecond does not have a symbol
196 # https://www.wikidata.org/wiki/Q829073
197
198 for item in data.WIKIDATA_UNITS.values():
199 if item['to_si_factor'] and item['symbol']:
200 SYMBOL_TO_SI.append(
201 (
202 item['symbol'],
203 item['si_name'],
204 1 / item['to_si_factor'], # from_si
205 item['to_si_factor'], # to_si
206 item['symbol'],
207 )
208 )
209
210 for item in ADDITIONAL_UNITS:
211 SYMBOL_TO_SI.append(
212 (
213 item['symbol'],
214 item['si_name'],
215 item['from_si'],
216 item['to_si'],
217 item['symbol'],
218 )
219 )
220
221 alias_items = []
222 for item in SYMBOL_TO_SI:
223 for alias in ALIAS_SYMBOLS.get(item[0], ()):
224 alias_items.append(
225 (
226 alias,
227 item[1],
228 item[2], # from_si
229 item[3], # to_si
230 item[0], # origin unit
231 )
232 )
233 SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
234 return SYMBOL_TO_SI
235
236
237# the response contains duplicate ?item with the different ?symbol
238# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
239# even if a ?item has different ?symbol of the same rank.
240# A deterministic result
241# see:
242# * https://www.wikidata.org/wiki/Help:Ranking
243# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
244# * https://w.wiki/32BT
245# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
246# see the result for https://www.wikidata.org/wiki/Q11582
247# there are multiple symbols the same rank
248
249SARQL_REQUEST = """
250SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
251WHERE
252{
253 ?item wdt:P31/wdt:P279 wd:Q47574 .
254 ?item p:P5061 ?symbolP .
255 ?symbolP ps:P5061 ?symbol ;
256 wikibase:rank ?rank .
257 OPTIONAL {
258 ?item p:P2370 ?tosistmt .
259 ?tosistmt psv:P2370 ?tosinode .
260 ?tosinode wikibase:quantityAmount ?tosi .
261 ?tosinode wikibase:quantityUnit ?tosiUnit .
262 }
263 FILTER(LANG(?symbol) = "en").
264}
265ORDER BY ?item DESC(?rank) ?symbol
266"""
267
268
270 """Fetch units from Wikidata. Function is used to update persistence of
271 :py:obj:`searx.data.WIKIDATA_UNITS`."""
272
273 results = collections.OrderedDict()
274 response = wikidata.send_wikidata_query(SARQL_REQUEST)
275 for unit in response['results']['bindings']:
276
277 symbol = unit['symbol']['value']
278 name = unit['item']['value'].rsplit('/', 1)[1]
279 si_name = unit.get('tosiUnit', {}).get('value', '')
280 if si_name:
281 si_name = si_name.rsplit('/', 1)[1]
282
283 to_si_factor = unit.get('tosi', {}).get('value', '')
284 if name not in results:
285 # ignore duplicate: always use the first one
286 results[name] = {
287 'symbol': symbol,
288 'si_name': si_name if si_name else None,
289 'to_si_factor': float(to_si_factor) if to_si_factor else None,
290 }
291 return results
::1337x
Definition 1337x.py:1
float convert_from_si(str si_name, str symbol, float|int value)
float convert_to_si(str si_name, str symbol, float|int value)