.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
public_domain_image_archive.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""Public domain image archive"""
3
4
from
urllib.parse
import
urlencode, urlparse, urlunparse, parse_qsl
5
from
json
import
dumps
6
7
from
searx.network
import
get
8
from
searx.utils
import
extr
9
from
searx.exceptions
import
SearxEngineAccessDeniedException, SearxEngineException
10
11
THUMBNAIL_SUFFIX =
"?fit=max&h=360&w=360"
12
"""
13
Example thumbnail urls (from requests & html):
14
- https://the-public-domain-review.imgix.net
15
/shop/nov-2023-prints-00043.jpg
16
?fit=max&h=360&w=360
17
- https://the-public-domain-review.imgix.net
18
/collections/the-history-of-four-footed-beasts-and-serpents-1658/
19
8616383182_5740fa7851_o.jpg
20
?fit=max&h=360&w=360
21
22
Example full image urls (from html)
23
- https://the-public-domain-review.imgix.net/shop/
24
nov-2023-prints-00043.jpg
25
?fit=clip&w=970&h=800&auto=format,compress
26
- https://the-public-domain-review.imgix.net/collections/
27
the-history-of-four-footed-beasts-and-serpents-1658/8616383182_5740fa7851_o.jpg
28
?fit=clip&w=310&h=800&auto=format,compress
29
30
The thumbnail url from the request will be cleaned for the full image link
31
The cleaned thumbnail url will have THUMBNAIL_SUFFIX added to them, based on the original thumbnail parameters
32
"""
33
34
# about
35
about = {
36
"website"
:
'https://pdimagearchive.org'
,
37
"use_official_api"
:
False
,
38
"require_api_key"
:
False
,
39
"results"
:
'JSON'
,
40
}
41
42
pdia_base_url =
'https://pdimagearchive.org'
43
pdia_config_start =
"/_astro/InfiniteSearch."
44
pdia_config_end =
".js"
45
categories = [
'images'
]
46
page_size = 20
47
paging =
True
48
49
50
__CACHED_API_URL =
None
51
52
53
def
_clean_url
(url):
54
parsed = urlparse(url)
55
query = [(k, v)
for
(k, v)
in
parse_qsl(parsed.query)
if
k
not
in
[
'ixid'
,
's'
]]
56
57
return
urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment))
58
59
60
def
_get_algolia_api_url
():
61
global
__CACHED_API_URL
# pylint:disable=global-statement
62
63
if
__CACHED_API_URL:
64
return
__CACHED_API_URL
65
66
# fake request to extract api url
67
resp = get(f
"{pdia_base_url}/search/?q="
)
68
if
resp.status_code != 200:
69
raise
LookupError(
"Failed to fetch config location (and as such the API url) for PDImageArchive"
)
70
pdia_config_filepart = extr(resp.text, pdia_config_start, pdia_config_end)
71
pdia_config_url = pdia_base_url + pdia_config_start + pdia_config_filepart + pdia_config_end
72
73
resp = get(pdia_config_url)
74
if
resp.status_code != 200:
75
raise
LookupError(
"Failed to obtain AWS api url for PDImageArchive"
)
76
77
api_url = extr(resp.text,
'const r="'
,
'"'
, default=
None
)
78
79
if
api_url
is
None
:
80
raise
LookupError(
"Couldn't obtain AWS api url for PDImageArchive"
)
81
82
__CACHED_API_URL = api_url
83
return
api_url
84
85
86
def
_clear_cached_api_url
():
87
global
__CACHED_API_URL
# pylint:disable=global-statement
88
89
__CACHED_API_URL =
None
90
91
92
def
request
(query, params):
93
params[
'url'
] =
_get_algolia_api_url
()
94
params[
'method'
] =
'POST'
95
96
request_data = {
97
'page'
: params[
'pageno'
] - 1,
98
'query'
: query,
99
'hitsPerPage'
: page_size,
100
'indexName'
:
'prod_all-images'
,
101
}
102
params[
'headers'
] = {
'Content-Type'
:
'application/json'
}
103
params[
'data'
] = dumps(request_data)
104
105
# http errors are handled manually to be able to reset the api url
106
params[
'raise_for_httperror'
] =
False
107
return
params
108
109
110
def
response
(resp):
111
results = []
112
json_data = resp.json()
113
114
if
resp.status_code == 403:
115
_clear_cached_api_url
()
116
raise
SearxEngineAccessDeniedException
()
117
118
if
resp.status_code != 200:
119
raise
SearxEngineException
()
120
121
if
'results'
not
in
json_data:
122
return
[]
123
124
for
result
in
json_data[
'results'
][0][
'hits'
]:
125
content = []
126
127
if
result.get(
"themes"
):
128
content.append(
"Themes: "
+ result[
'themes'
])
129
130
if
result.get(
"encompassingWork"
):
131
content.append(
"Encompassing work: "
+ result[
'encompassingWork'
])
132
133
base_image_url = result[
'thumbnail'
].split(
"?"
)[0]
134
135
results.append(
136
{
137
'template'
:
'images.html'
,
138
'url'
:
_clean_url
(f
"{about['website']}/images/{result['objectID']}"
),
139
'img_src'
:
_clean_url
(base_image_url),
140
'thumbnail_src'
:
_clean_url
(base_image_url + THUMBNAIL_SUFFIX),
141
'title'
: f
"{result['title'].strip()} by {result['artist']} {result.get('displayYear', '')}"
,
142
'content'
:
"\n"
.join(content),
143
}
144
)
145
146
return
results
searx.exceptions.SearxEngineAccessDeniedException
Definition
exceptions.py:58
searx.exceptions.SearxEngineException
Definition
exceptions.py:36
searx.engines.public_domain_image_archive._get_algolia_api_url
_get_algolia_api_url()
Definition
public_domain_image_archive.py:60
searx.engines.public_domain_image_archive.response
response(resp)
Definition
public_domain_image_archive.py:110
searx.engines.public_domain_image_archive._clear_cached_api_url
_clear_cached_api_url()
Definition
public_domain_image_archive.py:86
searx.engines.public_domain_image_archive._clean_url
_clean_url(url)
Definition
public_domain_image_archive.py:53
searx.engines.public_domain_image_archive.request
request(query, params)
Definition
public_domain_image_archive.py:92
searx.exceptions
Definition
exceptions.py:1
searx.network
Definition
__init__.py:1
searx.utils
Definition
utils.py:1
searxng
searx
engines
public_domain_image_archive.py
Generated on
for .oO SearXNG Developer Documentation Oo. by
1.14.0