.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
tineye.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This engine implements *Tineye - reverse image search*
3
4Using TinEye, you can search by image or perform what we call a reverse image
5search. You can do that by uploading an image or searching by URL. You can also
6simply drag and drop your images to start your search. TinEye constantly crawls
7the web and adds images to its index. Today, the TinEye index is over 50.2
8billion images `[tineye.com] <https://tineye.com/how>`_.
9
10.. hint::
11
12 This SearXNG engine only supports *'searching by URL'* and it does not use
13 the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
14
15"""
16
17from typing import TYPE_CHECKING
18from urllib.parse import urlencode
19from datetime import datetime
20from flask_babel import gettext
21
22from searx.result_types import EngineResults
23
24if TYPE_CHECKING:
25 import logging
26
27 logger = logging.getLogger()
28
29about = {
30 "website": 'https://tineye.com',
31 "wikidata_id": 'Q2382535',
32 "official_api_documentation": 'https://api.tineye.com/python/docs/',
33 "use_official_api": False,
34 "require_api_key": False,
35 "results": 'JSON',
36}
37
38engine_type = 'online_url_search'
39""":py:obj:`searx.search.processors.online_url_search`"""
40
41categories = ['general']
42paging = True
43safesearch = False
44base_url = 'https://tineye.com'
45search_string = '/api/v1/result_json/?page={page}&{query}'
46
47FORMAT_NOT_SUPPORTED = gettext(
48 "Could not read that image url. This may be due to an unsupported file"
49 " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
50)
51"""TinEye error message"""
52
53NO_SIGNATURE_ERROR = gettext(
54 "The image is too simple to find matches. TinEye requires a basic level of"
55 " visual detail to successfully identify matches."
56)
57"""TinEye error message"""
58
59DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
60"""TinEye error message"""
61
62
63def request(query, params):
64 """Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""
65
66 params['raise_for_httperror'] = False
67
68 if params['search_urls']['data:image']:
69 query = params['search_urls']['data:image']
70 elif params['search_urls']['http']:
71 query = params['search_urls']['http']
72
73 logger.debug("query URL: %s", query)
74 query = urlencode({'url': query})
75
76 # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
77 params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
78
79 params['headers'].update(
80 {
81 'Connection': 'keep-alive',
82 'Accept-Encoding': 'gzip, defalte, br',
83 'Host': 'tineye.com',
84 'DNT': '1',
85 'TE': 'trailers',
86 }
87 )
88 return params
89
90
91def parse_tineye_match(match_json):
92 """Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
93 object.
94
95 Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
96
97 - `image_url`, link to the result image.
98 - `domain`, domain this result was found on.
99 - `score`, a number (0 to 100) that indicates how closely the images match.
100 - `width`, image width in pixels.
101 - `height`, image height in pixels.
102 - `size`, image area in pixels.
103 - `format`, image format.
104 - `filesize`, image size in bytes.
105 - `overlay`, overlay URL.
106 - `tags`, whether this match belongs to a collection or stock domain.
107
108 - `backlinks`, a list of Backlink objects pointing to the original websites
109 and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
110 <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
111
112 - `url`, the image URL to the image.
113 - `backlink`, the original website URL.
114 - `crawl_date`, the date the image was crawled.
115
116 """
117
118 # HINT: there exists an alternative backlink dict in the domains list / e.g.::
119 #
120 # match_json['domains'][0]['backlinks']
121
122 backlinks = []
123 if "backlinks" in match_json:
124
125 for backlink_json in match_json["backlinks"]:
126 if not isinstance(backlink_json, dict):
127 continue
128
129 crawl_date = backlink_json.get("crawl_date")
130 if crawl_date:
131 crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
132 else:
133 crawl_date = datetime.min
134
135 backlinks.append(
136 {
137 'url': backlink_json.get("url"),
138 'backlink': backlink_json.get("backlink"),
139 'crawl_date': crawl_date,
140 'image_name': backlink_json.get("image_name"),
141 }
142 )
143
144 return {
145 'image_url': match_json.get("image_url"),
146 'domain': match_json.get("domain"),
147 'score': match_json.get("score"),
148 'width': match_json.get("width"),
149 'height': match_json.get("height"),
150 'size': match_json.get("size"),
151 'image_format': match_json.get("format"),
152 'filesize': match_json.get("filesize"),
153 'overlay': match_json.get("overlay"),
154 'tags': match_json.get("tags"),
155 'backlinks': backlinks,
156 }
157
158
159def response(resp) -> EngineResults:
160 """Parse HTTP response from TinEye."""
161 results = EngineResults()
162
163 # handle the 422 client side errors, and the possible 400 status code error
164 if resp.status_code in (400, 422):
165 json_data = resp.json()
166 suggestions = json_data.get('suggestions', {})
167 message = f'HTTP Status Code: {resp.status_code}'
168
169 if resp.status_code == 422:
170 s_key = suggestions.get('key', '')
171 if s_key == "Invalid image URL":
172 # test https://docs.searxng.org/_static/searxng-wordmark.svg
173 message = FORMAT_NOT_SUPPORTED
174 elif s_key == 'NO_SIGNATURE_ERROR':
175 # test https://pngimg.com/uploads/dot/dot_PNG4.png
176 message = NO_SIGNATURE_ERROR
177 elif s_key == 'Download Error':
178 # test https://notexists
179 message = DOWNLOAD_ERROR
180 else:
181 logger.warning("Unknown suggestion key encountered: %s", s_key)
182 else: # 400
183 description = suggestions.get('description')
184 if isinstance(description, list):
185 message = ','.join(description)
186
187 # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
188 # results.add(results.types.Answer(answer=message))
189 logger.info(message)
190 return results
191
192 # Raise for all other responses
193 resp.raise_for_status()
194
195 json_data = resp.json()
196
197 for match_json in json_data['matches']:
198
199 tineye_match = parse_tineye_match(match_json)
200 if not tineye_match['backlinks']:
201 continue
202
203 backlink = tineye_match['backlinks'][0]
204 results.append(
205 {
206 'template': 'images.html',
207 'url': backlink['backlink'],
208 'thumbnail_src': tineye_match['image_url'],
209 'source': backlink['url'],
210 'title': backlink['image_name'],
211 'img_src': backlink['url'],
212 'format': tineye_match['image_format'],
213 'width': tineye_match['width'],
214 'height': tineye_match['height'],
215 'publishedDate': backlink['crawl_date'],
216 }
217 )
218
219 # append number of results
220
221 number_of_results = json_data.get('num_matches')
222 if number_of_results:
223 results.append({'number_of_results': number_of_results})
224
225 return results
parse_tineye_match(match_json)
Definition tineye.py:91
request(query, params)
Definition tineye.py:63
EngineResults response(resp)
Definition tineye.py:159