.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
public_domain_image_archive.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Public domain image archive"""
3
4from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
5from json import dumps
6
7from searx.network import get
8from searx.utils import extr
9from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineException
10
11THUMBNAIL_SUFFIX = "?fit=max&h=360&w=360"
12"""
13Example thumbnail urls (from requests & html):
14- https://the-public-domain-review.imgix.net
15 /shop/nov-2023-prints-00043.jpg
16 ?fit=max&h=360&w=360
17- https://the-public-domain-review.imgix.net
18 /collections/the-history-of-four-footed-beasts-and-serpents-1658/
19 8616383182_5740fa7851_o.jpg
20 ?fit=max&h=360&w=360
21
22Example full image urls (from html)
23- https://the-public-domain-review.imgix.net/shop/
24 nov-2023-prints-00043.jpg
25 ?fit=clip&w=970&h=800&auto=format,compress
26- https://the-public-domain-review.imgix.net/collections/
27 the-history-of-four-footed-beasts-and-serpents-1658/8616383182_5740fa7851_o.jpg
28 ?fit=clip&w=310&h=800&auto=format,compress
29
30The thumbnail url from the request will be cleaned for the full image link
31The cleaned thumbnail url will have THUMBNAIL_SUFFIX added to them, based on the original thumbnail parameters
32"""
33
34# about
35about = {
36 "website": 'https://pdimagearchive.org',
37 "use_official_api": False,
38 "require_api_key": False,
39 "results": 'JSON',
40}
41
42base_url = 'https://oqi2j6v4iz-dsn.algolia.net'
43pdia_config_url = 'https://pdimagearchive.org/_astro/config.BiNvrvzG.js'
44categories = ['images']
45page_size = 20
46paging = True
47
48
49__CACHED_API_KEY = None
50
51
52def _clean_url(url):
53 parsed = urlparse(url)
54 query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
55
56 return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment))
57
58
60 global __CACHED_API_KEY # pylint:disable=global-statement
61
62 if __CACHED_API_KEY:
63 return __CACHED_API_KEY
64
65 resp = get(pdia_config_url)
66 if resp.status_code != 200:
67 raise LookupError("Failed to obtain Algolia API key for PDImageArchive")
68
69 api_key = extr(resp.text, 'r="', '"', default=None)
70
71 if api_key is None:
72 raise LookupError("Couldn't obtain Algolia API key for PDImageArchive")
73
74 __CACHED_API_KEY = api_key
75 return api_key
76
77
79 global __CACHED_API_KEY # pylint:disable=global-statement
80
81 __CACHED_API_KEY = None
82
83
84def request(query, params):
85 api_key = _get_algolia_api_key()
86
87 args = {
88 'x-algolia-api-key': api_key,
89 'x-algolia-application-id': 'OQI2J6V4IZ',
90 }
91 params['url'] = f"{base_url}/1/indexes/*/queries?{urlencode(args)}"
92 params["method"] = "POST"
93
94 request_params = {
95 "page": params["pageno"] - 1,
96 "query": query,
97 "highlightPostTag": "__ais-highlight__",
98 "highlightPreTag": "__ais-highlight__",
99 }
100 data = {
101 "requests": [
102 {"indexName": "prod_all-images", "params": urlencode(request_params)},
103 ]
104 }
105 params["data"] = dumps(data)
106
107 # http errors are handled manually to be able to reset the api key
108 params['raise_for_httperror'] = False
109 return params
110
111
112def response(resp):
113 results = []
114 json_data = resp.json()
115
116 if resp.status_code == 403:
119
120 if resp.status_code != 200:
122
123 if 'results' not in json_data:
124 return []
125
126 for result in json_data['results'][0]['hits']:
127 content = []
128
129 if "themes" in result:
130 content.append("Themes: " + result['themes'])
131
132 if "encompassingWork" in result:
133 content.append("Encompassing work: " + result['encompassingWork'])
134 content = "\n".join(content)
135
136 base_image_url = result['thumbnail'].split("?")[0]
137
138 results.append(
139 {
140 'template': 'images.html',
141 'url': _clean_url(f"{about['website']}/images/{result['objectID']}"),
142 'img_src': _clean_url(base_image_url),
143 'thumbnail_src': _clean_url(base_image_url + THUMBNAIL_SUFFIX),
144 'title': f"{result['title'].strip()} by {result['artist']} {result.get('displayYear', '')}",
145 'content': content,
146 }
147 )
148
149 return results