duden_8py_source.html

# SPDX-License-Identifier: AGPL-3.0-or-later

"""Duden


"""


import re

from urllib.parse import quote, urljoin

from lxml import html

from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex

from searx.network import raise_for_httperror


# about

about = {

    "website": 'https://www.duden.de',

    "wikidata_id": 'Q73624591',

    "official_api_documentation": None,

    "use_official_api": False,

    "require_api_key": False,

    "results": 'HTML',

    "language": 'de',

}


categories = ['dictionaries']

paging = True


# search-url

base_url = 'https://www.duden.de/'

search_url = base_url + 'suchen/dudenonline/{query}?search_api_fulltext=&page={offset}'


def request(query, params):


    offset = params['pageno'] - 1

    if offset == 0:

        search_url_fmt = base_url + 'suchen/dudenonline/{query}'

        params['url'] = search_url_fmt.format(query=quote(query))

    else:

        params['url'] = search_url.format(offset=offset, query=quote(query))

    # after the last page of results, spelling corrections are returned after a HTTP redirect

    # whatever the page number is

    params['soft_max_redirects'] = 1

    params['raise_for_httperror'] = False

    return params


def response(resp):

    results = []


    if resp.status_code == 404:

        return results


    raise_for_httperror(resp)


    dom = html.fromstring(resp.text)


    number_of_results_element = eval_xpath_getindex(

        dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None

    )

    if number_of_results_element is not None:

        number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)

        results.append({'number_of_results': int(number_of_results_string)})


    for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):

        url = eval_xpath_getindex(result, './/h2/a', 0).get('href')

        url = urljoin(base_url, url)

        title = eval_xpath(result, 'string(.//h2/a)').strip()

        content = extract_text(eval_xpath(result, './/p'))

        # append result

        results.append({'url': url, 'title': title, 'content': content})


    return results


searx.engines.duden.response
response(resp)
Definition duden.py:46

searx.engines.duden.request
request(query, params)
Definition duden.py:31

searx.network
Definition __init__.py:1

searx.utils
Definition utils.py:1