.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
public_domain_image_archive.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Public domain image archive"""
3
4from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
5from json import dumps
6
7from searx.network import get
8from searx.utils import extr
9from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineException
10
11THUMBNAIL_SUFFIX = "?fit=max&h=360&w=360"
12"""
13Example thumbnail urls (from requests & html):
14- https://the-public-domain-review.imgix.net
15 /shop/nov-2023-prints-00043.jpg
16 ?fit=max&h=360&w=360
17- https://the-public-domain-review.imgix.net
18 /collections/the-history-of-four-footed-beasts-and-serpents-1658/
19 8616383182_5740fa7851_o.jpg
20 ?fit=max&h=360&w=360
21
22Example full image urls (from html)
23- https://the-public-domain-review.imgix.net/shop/
24 nov-2023-prints-00043.jpg
25 ?fit=clip&w=970&h=800&auto=format,compress
26- https://the-public-domain-review.imgix.net/collections/
27 the-history-of-four-footed-beasts-and-serpents-1658/8616383182_5740fa7851_o.jpg
28 ?fit=clip&w=310&h=800&auto=format,compress
29
30The thumbnail url from the request will be cleaned for the full image link
31The cleaned thumbnail url will have THUMBNAIL_SUFFIX added to them, based on the original thumbnail parameters
32"""
33
34# about
35about = {
36 "website": 'https://pdimagearchive.org',
37 "use_official_api": False,
38 "require_api_key": False,
39 "results": 'JSON',
40}
41
42base_url = 'https://oqi2j6v4iz-dsn.algolia.net'
43pdia_base_url = 'https://pdimagearchive.org'
44pdia_search_url = pdia_base_url + '/search/?q='
45pdia_config_start = "/_astro/InfiniteSearch."
46pdia_config_end = ".js"
47categories = ['images']
48page_size = 20
49paging = True
50
51
52__CACHED_API_KEY = None
53
54
55def _clean_url(url):
56 parsed = urlparse(url)
57 query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
58
59 return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment))
60
61
63 global __CACHED_API_KEY # pylint:disable=global-statement
64
65 if __CACHED_API_KEY:
66 return __CACHED_API_KEY
67
68 resp = get(pdia_search_url)
69 if resp.status_code != 200:
70 raise LookupError("Failed to fetch config location (and as such the API key) for PDImageArchive")
71 pdia_config_filepart = extr(resp.text, pdia_config_start, pdia_config_end)
72 pdia_config_url = pdia_base_url + pdia_config_start + pdia_config_filepart + pdia_config_end
73
74 resp = get(pdia_config_url)
75 if resp.status_code != 200:
76 raise LookupError("Failed to obtain Algolia API key for PDImageArchive")
77
78 api_key = extr(resp.text, 'const r="', '"', default=None)
79
80 if api_key is None:
81 raise LookupError("Couldn't obtain Algolia API key for PDImageArchive")
82
83 __CACHED_API_KEY = api_key
84 return api_key
85
86
88 global __CACHED_API_KEY # pylint:disable=global-statement
89
90 __CACHED_API_KEY = None
91
92
93def request(query, params):
94 api_key = _get_algolia_api_key()
95
96 args = {
97 'x-algolia-api-key': api_key,
98 'x-algolia-application-id': 'OQI2J6V4IZ',
99 }
100 params['url'] = f"{base_url}/1/indexes/*/queries?{urlencode(args)}"
101 params["method"] = "POST"
102
103 request_params = {
104 "page": params["pageno"] - 1,
105 "query": query,
106 "highlightPostTag": "__ais-highlight__",
107 "highlightPreTag": "__ais-highlight__",
108 }
109 data = {
110 "requests": [
111 {"indexName": "prod_all-images", "params": urlencode(request_params)},
112 ]
113 }
114 params["data"] = dumps(data)
115
116 # http errors are handled manually to be able to reset the api key
117 params['raise_for_httperror'] = False
118 return params
119
120
121def response(resp):
122 results = []
123 json_data = resp.json()
124
125 if resp.status_code == 403:
128
129 if resp.status_code != 200:
131
132 if 'results' not in json_data:
133 return []
134
135 for result in json_data['results'][0]['hits']:
136 content = []
137
138 if "themes" in result:
139 content.append("Themes: " + result['themes'])
140
141 if "encompassingWork" in result:
142 content.append("Encompassing work: " + result['encompassingWork'])
143 content = "\n".join(content)
144
145 base_image_url = result['thumbnail'].split("?")[0]
146
147 results.append(
148 {
149 'template': 'images.html',
150 'url': _clean_url(f"{about['website']}/images/{result['objectID']}"),
151 'img_src': _clean_url(base_image_url),
152 'thumbnail_src': _clean_url(base_image_url + THUMBNAIL_SUFFIX),
153 'title': f"{result['title'].strip()} by {result['artist']} {result.get('displayYear', '')}",
154 'content': content,
155 }
156 )
157
158 return results