jisho_8py_source.html

# SPDX-License-Identifier: AGPL-3.0-or-later

"""

Jisho (the Japanese-English dictionary)

"""


from urllib.parse import urlencode, urljoin


# about

about = {

    "website": 'https://jisho.org',

    "wikidata_id": 'Q24568389',

    "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",

    "use_official_api": True,

    "require_api_key": False,

    "results": 'JSON',

    "language": 'ja',

}


categories = ['dictionaries']

paging = False


URL = 'https://jisho.org'

BASE_URL = 'https://jisho.org/word/'

SEARCH_URL = URL + '/api/v1/search/words?{query}'


def request(query, params):

    query = urlencode({'keyword': query})

    params['url'] = SEARCH_URL.format(query=query)

    logger.debug(f"query_url --> {params['url']}")

    return params


def response(resp):

    results = []

    first_result = True


    search_results = resp.json()


    for page in search_results.get('data', []):

        # Entries that are purely from Wikipedia are excluded.

        parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')

        if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':

            pass


        # Process alternative forms

        alt_forms = []

        for title_raw in page['japanese']:

            if 'word' not in title_raw:

                alt_forms.append(title_raw['reading'])

            else:

                title = title_raw['word']

                if 'reading' in title_raw:

                    title += ' (' + title_raw['reading'] + ')'

                alt_forms.append(title)


        result_url = urljoin(BASE_URL, page['slug'])

        definitions = get_definitions(page)


        # For results, we'll return the URL, all alternative forms (as title),

        # and all definitions (as description) truncated to 300 characters.

        content = " ".join(f"{engdef}." for _, engdef, _ in definitions)

        results.append(

            {'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...')}

        )


        # Like Wordnik, we'll return the first result in an infobox too.

        if first_result:

            first_result = False

            results.append(get_infobox(alt_forms, result_url, definitions))


    return results


def get_definitions(page):

    # Process definitions

    definitions = []

    for defn_raw in page['senses']:

        extra = []

        # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.

        if defn_raw.get('tags'):

            if defn_raw.get('info'):

                # "usually written as kana: <kana>"

                extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')

            else:

                # abbreviation, archaism, etc.

                extra.append(', '.join(defn_raw['tags']) + '. ')

        elif defn_raw.get('info'):

            # inconsistent

            extra.append(', '.join(defn_raw['info']).capitalize() + '. ')

        if defn_raw.get('restrictions'):

            extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')

        definitions.append(

            (

                ', '.join(defn_raw['parts_of_speech']),

                '; '.join(defn_raw['english_definitions']),

                ''.join(extra)[:-1],

            )

        )

    return definitions


def get_infobox(alt_forms, result_url, definitions):

    infobox_content = []

    # title & alt_forms

    infobox_title = alt_forms[0]

    if len(alt_forms) > 1:

        infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')


    # definitions

    infobox_content.append(

        '''

        <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>

        and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>

        by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>

        <ul>

    '''

    )

    for pos, engdef, extra in definitions:

        if pos == 'Wikipedia definition':

            infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')

        pos = f'<i>{pos}</i>: ' if pos else ''

        extra = f' ({extra})' if extra else ''

        infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')

    infobox_content.append('</ul>')


    #

    return {

        'infobox': infobox_title,

        'content': ''.join(infobox_content),

        'urls': [

            {

                'title': 'Jisho.org',

                'url': result_url,

            }

        ],

    }


searx.engines.jisho.response
response(resp)
Definition jisho.py:34

searx.engines.jisho.get_definitions
get_definitions(page)
Definition jisho.py:75

searx.engines.jisho.get_infobox
get_infobox(alt_forms, result_url, definitions)
Definition jisho.py:103

searx.engines.jisho.request
request(query, params)
Definition jisho.py:27