115def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
116
117
118
119
120
121 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
122
123 for field_name in url_fields:
124 url_src = getattr(result, field_name, "")
125 if not url_src:
126 continue
127
128 new_url = filter_func(result, field_name, url_src)
129
130 if isinstance(new_url, bool):
131 if new_url:
132
133 continue
134 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
135 new_url = None
136 else:
137 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
138
139 setattr(result, field_name, new_url)
140 if field_name == "url":
141
142 if not new_url:
143 result.parsed_url = None
144 elif isinstance(new_url, str):
145 result.parsed_url = urllib.parse.urlparse(new_url)
146
147
148
149
150
151
152 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
153
154 if infobox_urls:
155
156 new_infobox_urls: list[dict[str, str]] = []
157
158 for item in infobox_urls:
159 url_src = item.get("url")
160 if not url_src:
161 new_infobox_urls.append(item)
162 continue
163
164 new_url = filter_func(result, "infobox_urls", url_src)
165 if isinstance(new_url, bool):
166 if new_url:
167 new_infobox_urls.append(item)
168
169 continue
170 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
171 new_url = None
172 if new_url:
173 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
174 item["url"] = new_url
175 new_infobox_urls.append(item)
176
177 setattr(result, "urls", new_infobox_urls)
178
179
180
181
182
183 infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
184
185 if infobox_attributes:
186
187 new_infobox_attributes: list[dict[str, dict]] = []
188
189 for item in infobox_attributes:
190 image = item.get("image", {})
191 url_src = image.get("src", "")
192 if not url_src:
193 new_infobox_attributes.append(item)
194 continue
195
196 new_url = filter_func(result, "infobox_attributes", url_src)
197 if isinstance(new_url, bool):
198 if new_url:
199 new_infobox_attributes.append(item)
200
201 continue
202 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
203 new_url = None
204
205 if new_url:
206 log.debug(
207 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
208 url_src,
209 new_url,
210 )
211 item["image"]["src"] = new_url
212 new_infobox_attributes.append(item)
213
214 setattr(result, "attributes", new_infobox_attributes)
215
216 result.normalize_result_fields()
217
218