.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
base.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""BASE (Scholar publications)
3
4
"""
5
from
datetime
import
datetime
6
import
re
7
8
from
urllib.parse
import
urlencode
9
from
lxml
import
etree
10
from
searx.utils
import
searx_useragent
11
12
# about
13
about = {
14
"website"
:
'https://base-search.net'
,
15
"wikidata_id"
:
'Q448335'
,
16
"official_api_documentation"
:
'https://api.base-search.net/'
,
17
"use_official_api"
:
True
,
18
"require_api_key"
:
False
,
19
"results"
:
'XML'
,
20
}
21
22
categories = [
'science'
]
23
24
base_url = (
25
'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'
26
+
'?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
27
)
28
29
# engine dependent config
30
paging =
True
31
number_of_results = 10
32
33
# shortcuts for advanced search
34
shorcut_dict = {
35
# user-friendly keywords
36
'format:'
:
'dcformat:'
,
37
'author:'
:
'dccreator:'
,
38
'collection:'
:
'dccollection:'
,
39
'hdate:'
:
'dchdate:'
,
40
'contributor:'
:
'dccontributor:'
,
41
'coverage:'
:
'dccoverage:'
,
42
'date:'
:
'dcdate:'
,
43
'abstract:'
:
'dcdescription:'
,
44
'urls:'
:
'dcidentifier:'
,
45
'language:'
:
'dclanguage:'
,
46
'publisher:'
:
'dcpublisher:'
,
47
'relation:'
:
'dcrelation:'
,
48
'rights:'
:
'dcrights:'
,
49
'source:'
:
'dcsource:'
,
50
'subject:'
:
'dcsubject:'
,
51
'title:'
:
'dctitle:'
,
52
'type:'
:
'dcdctype:'
,
53
}
54
55
56
def
request
(query, params):
57
# replace shortcuts with API advanced search keywords
58
for
key, val
in
shorcut_dict.items():
59
query = re.sub(key, val, query)
60
61
# basic search
62
offset = (params[
'pageno'
] - 1) * number_of_results
63
64
string_args = {
65
'query'
: urlencode({
'query'
: query}),
66
'offset'
: offset,
67
'hits'
: number_of_results,
68
}
69
70
params[
'url'
] = base_url.format(**string_args)
71
72
params[
'headers'
][
'User-Agent'
] = searx_useragent()
73
return
params
74
75
76
def
response
(resp):
77
results = []
78
79
search_results = etree.XML(resp.content)
80
81
for
entry
in
search_results.xpath(
'./result/doc'
):
82
content =
"No description available"
83
url =
""
84
title =
""
85
date = datetime.now()
# needed in case no dcdate is available for an item
86
87
for
item
in
entry:
88
if
item.attrib[
"name"
] ==
"dcdate"
:
89
date = item.text
90
91
elif
item.attrib[
"name"
] ==
"dctitle"
:
92
title = item.text
93
94
elif
item.attrib[
"name"
] ==
"dclink"
:
95
url = item.text
96
97
elif
item.attrib[
"name"
] ==
"dcdescription"
:
98
content = item.text[:300]
99
if
len(item.text) > 300:
100
content +=
"..."
101
102
# dates returned by the BASE API are not several formats
103
publishedDate =
None
104
for
date_format
in
[
'%Y-%m-%dT%H:%M:%SZ'
,
'%Y-%m-%d'
,
'%Y-%m'
,
'%Y'
]:
105
try
:
106
publishedDate = datetime.strptime(date, date_format)
107
break
108
except
:
# pylint: disable=bare-except
109
pass
110
111
if
publishedDate
is
not
None
:
112
res_dict = {
'url'
: url,
'title'
: title,
'publishedDate'
: publishedDate,
'content'
: content}
113
else
:
114
res_dict = {
'url'
: url,
'title'
: title,
'content'
: content}
115
116
results.append(res_dict)
117
118
return
results
searx.engines.base.response
response(resp)
Definition
base.py:76
searx.engines.base.request
request(query, params)
Definition
base.py:56
searx.utils
Definition
utils.py:1
searxng
searx
engines
base.py
Generated on Sat Nov 16 2024 00:10:57 for .oO SearXNG Developer Documentation Oo. by
1.12.0