.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
public_domain_image_archive.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""Public domain image archive"""
3
4
from
urllib.parse
import
urlencode, urlparse, urlunparse, parse_qsl
5
from
json
import
dumps
6
7
from
searx.network
import
get
8
from
searx.utils
import
extr
9
from
searx.exceptions
import
SearxEngineAccessDeniedException, SearxEngineException
10
11
THUMBNAIL_SUFFIX =
"?fit=max&h=360&w=360"
12
"""
13
Example thumbnail urls (from requests & html):
14
- https://the-public-domain-review.imgix.net
15
/shop/nov-2023-prints-00043.jpg
16
?fit=max&h=360&w=360
17
- https://the-public-domain-review.imgix.net
18
/collections/the-history-of-four-footed-beasts-and-serpents-1658/
19
8616383182_5740fa7851_o.jpg
20
?fit=max&h=360&w=360
21
22
Example full image urls (from html)
23
- https://the-public-domain-review.imgix.net/shop/
24
nov-2023-prints-00043.jpg
25
?fit=clip&w=970&h=800&auto=format,compress
26
- https://the-public-domain-review.imgix.net/collections/
27
the-history-of-four-footed-beasts-and-serpents-1658/8616383182_5740fa7851_o.jpg
28
?fit=clip&w=310&h=800&auto=format,compress
29
30
The thumbnail url from the request will be cleaned for the full image link
31
The cleaned thumbnail url will have THUMBNAIL_SUFFIX added to them, based on the original thumbnail parameters
32
"""
33
34
# about
35
about = {
36
"website"
:
'https://pdimagearchive.org'
,
37
"use_official_api"
:
False
,
38
"require_api_key"
:
False
,
39
"results"
:
'JSON'
,
40
}
41
42
base_url =
'https://oqi2j6v4iz-dsn.algolia.net'
43
pdia_config_url =
'https://pdimagearchive.org/_astro/config.BiNvrvzG.js'
44
categories = [
'images'
]
45
page_size = 20
46
paging =
True
47
48
49
__CACHED_API_KEY =
None
50
51
52
def
_clean_url
(url):
53
parsed = urlparse(url)
54
query = [(k, v)
for
(k, v)
in
parse_qsl(parsed.query)
if
k
not
in
[
'ixid'
,
's'
]]
55
56
return
urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment))
57
58
59
def
_get_algolia_api_key
():
60
global
__CACHED_API_KEY
# pylint:disable=global-statement
61
62
if
__CACHED_API_KEY:
63
return
__CACHED_API_KEY
64
65
resp = get(pdia_config_url)
66
if
resp.status_code != 200:
67
raise
LookupError(
"Failed to obtain Algolia API key for PDImageArchive"
)
68
69
api_key = extr(resp.text,
'r="'
,
'"'
, default=
None
)
70
71
if
api_key
is
None
:
72
raise
LookupError(
"Couldn't obtain Algolia API key for PDImageArchive"
)
73
74
__CACHED_API_KEY = api_key
75
return
api_key
76
77
78
def
_clear_cached_api_key
():
79
global
__CACHED_API_KEY
# pylint:disable=global-statement
80
81
__CACHED_API_KEY =
None
82
83
84
def
request
(query, params):
85
api_key =
_get_algolia_api_key
()
86
87
args = {
88
'x-algolia-api-key'
: api_key,
89
'x-algolia-application-id'
:
'OQI2J6V4IZ'
,
90
}
91
params[
'url'
] = f
"{base_url}/1/indexes/*/queries?{urlencode(args)}"
92
params[
"method"
] =
"POST"
93
94
request_params = {
95
"page"
: params[
"pageno"
] - 1,
96
"query"
: query,
97
"highlightPostTag"
:
"__ais-highlight__"
,
98
"highlightPreTag"
:
"__ais-highlight__"
,
99
}
100
data = {
101
"requests"
: [
102
{
"indexName"
:
"prod_all-images"
,
"params"
: urlencode(request_params)},
103
]
104
}
105
params[
"data"
] = dumps(data)
106
107
# http errors are handled manually to be able to reset the api key
108
params[
'raise_for_httperror'
] =
False
109
return
params
110
111
112
def
response
(resp):
113
results = []
114
json_data = resp.json()
115
116
if
resp.status_code == 403:
117
_clear_cached_api_key
()
118
raise
SearxEngineAccessDeniedException
()
119
120
if
resp.status_code != 200:
121
raise
SearxEngineException
()
122
123
if
'results'
not
in
json_data:
124
return
[]
125
126
for
result
in
json_data[
'results'
][0][
'hits'
]:
127
content = []
128
129
if
"themes"
in
result:
130
content.append(
"Themes: "
+ result[
'themes'
])
131
132
if
"encompassingWork"
in
result:
133
content.append(
"Encompassing work: "
+ result[
'encompassingWork'
])
134
content =
"\n"
.join(content)
135
136
base_image_url = result[
'thumbnail'
].split(
"?"
)[0]
137
138
results.append(
139
{
140
'template'
:
'images.html'
,
141
'url'
:
_clean_url
(f
"{about['website']}/images/{result['objectID']}"
),
142
'img_src'
:
_clean_url
(base_image_url),
143
'thumbnail_src'
:
_clean_url
(base_image_url + THUMBNAIL_SUFFIX),
144
'title'
: f
"{result['title'].strip()} by {result['artist']} {result.get('displayYear', '')}"
,
145
'content'
: content,
146
}
147
)
148
149
return
results
searx.exceptions.SearxEngineAccessDeniedException
Definition
exceptions.py:58
searx.exceptions.SearxEngineException
Definition
exceptions.py:36
searx.engines.public_domain_image_archive.response
response(resp)
Definition
public_domain_image_archive.py:112
searx.engines.public_domain_image_archive._clear_cached_api_key
_clear_cached_api_key()
Definition
public_domain_image_archive.py:78
searx.engines.public_domain_image_archive._get_algolia_api_key
_get_algolia_api_key()
Definition
public_domain_image_archive.py:59
searx.engines.public_domain_image_archive._clean_url
_clean_url(url)
Definition
public_domain_image_archive.py:52
searx.engines.public_domain_image_archive.request
request(query, params)
Definition
public_domain_image_archive.py:84
searx.exceptions
Definition
exceptions.py:1
searx.network
Definition
__init__.py:1
searx.utils
Definition
utils.py:1
searxng
searx
engines
public_domain_image_archive.py
Generated on Thu Feb 20 2025 20:12:42 for .oO SearXNG Developer Documentation Oo. by
1.13.2