semantic__scholar_8py_source.html

# SPDX-License-Identifier: AGPL-3.0-or-later

"""Semantic Scholar (Science)"""


from json import dumps

from datetime import datetime

from lxml import html


from flask_babel import gettext

from searx.network import get

from searx.utils import eval_xpath_getindex, gen_useragent, html_to_text


about = {

    "website": 'https://www.semanticscholar.org/',

    "wikidata_id": 'Q22908627',

    "official_api_documentation": 'https://api.semanticscholar.org/',

    "use_official_api": True,

    "require_api_key": False,

    "results": 'JSON',

}


categories = ['science', 'scientific publications']

paging = True

search_url = 'https://www.semanticscholar.org/api/1/search'

base_url = 'https://www.semanticscholar.org'


def _get_ui_version():

    resp = get(base_url)

    if not resp.ok:

        raise RuntimeError("Can't determine Semantic Scholar UI version")


    doc = html.fromstring(resp.text)

    ui_version = eval_xpath_getindex(doc, "//meta[@name='s2-ui-version']/@content", 0)

    if not ui_version:

        raise RuntimeError("Can't determine Semantic Scholar UI version")


    return ui_version


def request(query, params):

    params['url'] = search_url

    params['method'] = 'POST'

    params['headers'] = {

        'Content-Type': 'application/json',

        'X-S2-UI-Version': _get_ui_version(),

        'X-S2-Client': "webapp-browser",

        'User-Agent': gen_useragent(),

    }

    params['data'] = dumps(

        {

            "queryString": query,

            "page": params['pageno'],

            "pageSize": 10,

            "sort": "relevance",

            "getQuerySuggestions": False,

            "authors": [],

            "coAuthors": [],

            "venues": [],

            "performTitleMatch": True,

        }

    )

    return params


def response(resp):

    res = resp.json()


    results = []

    for result in res['results']:

        url = result.get('primaryPaperLink', {}).get('url')

        if not url and result.get('links'):

            url = result.get('links')[0]

        if not url:

            alternatePaperLinks = result.get('alternatePaperLinks')

            if alternatePaperLinks:

                url = alternatePaperLinks[0].get('url')

        if not url:

            url = base_url + '/paper/%s' % result['id']


        # publishedDate

        if 'pubDate' in result:

            publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")

        else:

            publishedDate = None


        # authors

        authors = [author[0]['name'] for author in result.get('authors', [])]


        # pick for the first alternate link, but not from the crawler

        pdf_url = None

        for doc in result.get('alternatePaperLinks', []):

            if doc['linkType'] not in ('crawler', 'doi'):

                pdf_url = doc['url']

                break


        # comments

        comments = None

        if 'citationStats' in result:

            comments = gettext(

                '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'

            ).format(

                numCitations=result['citationStats']['numCitations'],

                firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],

                lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],

            )


        results.append(

            {

                'template': 'paper.html',

                'url': url,

                'title': result['title']['text'],

                'content': html_to_text(result['paperAbstract']['text']),

                'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),

                'doi': result.get('doiInfo', {}).get('doi'),

                'tags': result.get('fieldsOfStudy'),

                'authors': authors,

                'pdf_url': pdf_url,

                'publishedDate': publishedDate,

                'comments': comments,

            }

        )


    return results


searx.engines.semantic_scholar.request
request(query, params)
Definition semantic_scholar.py:41

searx.engines.semantic_scholar.response
response(resp)
Definition semantic_scholar.py:66

searx.engines.semantic_scholar._get_ui_version
_get_ui_version()
Definition semantic_scholar.py:28

searx.network
Definition __init__.py:1

searx.utils
Definition utils.py:1