.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
code.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Typification of the *code* results. Results of this type are rendered in
3the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
4template. For highlighting the code passages, Pygments_ is used.
5
6.. _Pygments: https://pygments.org
7
8----
9
10.. autoclass:: Code
11 :members:
12 :show-inheritance:
13
14"""
15# pylint: disable=too-few-public-methods, disable=invalid-name
16
17from __future__ import annotations
18
19__all__ = ["Code"]
20
21import typing as t
22
23from pygments import highlight # pyright: ignore[reportUnknownVariableType]
24from pygments.lexers._mapping import LEXERS # pyright: ignore[reportMissingTypeStubs]
25from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
26from pygments.util import ClassNotFound
27from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
28
29from ._base import MainResult
30
31
32_pygments_languages: list[str] = []
33
34
35def is_valid_language(code_language: str) -> bool:
36 """Checks if the specified ``code_language`` is known in Pygments."""
37 if not _pygments_languages:
38 for l in LEXERS.values():
39 # l[2] is the tuple with the alias names
40 for alias_name in l[2]:
41 _pygments_languages.append(alias_name.lower())
42 return code_language.lower() in _pygments_languages
43
44
45@t.final
46class Code(MainResult, kw_only=True):
47 """Result type suitable for displaying code passages."""
48
49 template: str = "code.html"
50
51 repository: str | None = None
52 """A link related to a repository related to the *result*."""
53
54 codelines: list[tuple[int, str]] = []
55 """A list of two digit tuples where the first item is the line number and
56 the second item is the code line."""
57
58 hl_lines: set[int] = set()
59 """A list of line numbers to highlight."""
60
61 code_language: str = "<guess>"
62 """Pygment's short name of the lexer, e.g. ``text`` for the
63 :py:obj:`pygments.lexers.special.TextLexer`. For a list of available
64 languages consult: `Pygments languages`_. If the language is not in this
65 list, a :py:obj:`ValueError` is raised.
66
67 The default is ``<guess>`` which has a special meaning;
68
69 - If :py:obj:`Code.filename` is set, Pygment's factory method
70 :py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
71 the language of the ``codelines``.
72
73 - else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.
74
75 In case the language can't be detected, the fallback is ``text``.
76
77 .. _Pygments languages: https://pygments.org/languages/
78 """
79
80 filename: str | None = None
81 """Optional file name, can help to ``<guess>`` the language of the code (in
82 case of ambiguous short code examples). If :py:obj:`Code.title` is not set,
83 its default is the filename."""
84
85 strip_new_lines: bool = True
86 """Strip leading and trailing newlines for each returned fragment (default:
87 ``True``). Single file might return multiple code fragments."""
88
89 strip_whitespace: bool = False
90 """Strip all leading and trailing whitespace for each returned fragment
91 (default: ``False``). Single file might return multiple code fragments.
92 Enabling this might break code indentation."""
93
94 def __post_init__(self):
95 super().__post_init__()
96
97 if not self.title and self.filename:
98 self.title = self.filename
99
100 if self.code_language != "<guess>" and not is_valid_language(self.code_language):
101 raise ValueError(f"unknown code_language: {self.code_language}")
102
103 def __hash__(self):
104 """The hash value is build up from URL and code lines. :py:obj:`Code
105 <Result.__eq__>` objects are equal, when the hash values of both objects
106 are equal.
107 """
108 return hash(f"{self.url} {self.codelines}")
109
110 def get_lexer(self):
111 if self.code_language != "<guess>":
112 return get_lexer_by_name(self.code_language)
113
114 src_code = "\n".join([l[1] for l in self.codelines])
115 if self.filename:
116 try:
117 return guess_lexer_for_filename(self.filename, src_code)
118 except ClassNotFound:
119 pass
120 try:
121 return guess_lexer(src_code)
122 except ClassNotFound:
123 pass
124 return get_lexer_by_name("text")
125
126 def HTML(self, **options) -> str: # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
127 """Rendered HTML, additional options are accepted, for more details have
128 a look at HtmlFormatter_.
129
130 .. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
131 """
132 lexer = self.get_lexer()
133
134 line_no: int = 0 # current line number
135 code_block_start: int = 0 # line where the current code block starts
136 code_block_end: int | None = None # line where the current code ends
137 code_block: list[str] = [] # lines of the current code block
138 html_code_blocks: list[str] = [] # HTML representation of all code blocks
139
140 def _render(**kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
141 for k, default in [
142 ("linenos", "inline"),
143 ("linenostart", code_block_start),
144 ("cssclass", "code-highlight"),
145 ("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
146 ]:
147 kwargs[k] = kwargs.get(k, default) # pyright: ignore[reportUnknownMemberType]
148
149 # Wrap the code inside <pre> blocks using <code>, as recommended by
150 # the HTML5 specification (default is False). Do we need this?
151 kwargs["wrapcode"] = kwargs.get("wrapcode", True)
152
153 html_code_blocks.append(
154 highlight(
155 "\n".join(code_block),
156 lexer,
157 HtmlFormatter(**kwargs), # pyright: ignore[reportUnknownArgumentType]
158 )
159 )
160
161 for line_no, code_line in self.codelines:
162 if code_block_end is None:
163 # initial start condition
164 code_block_start = line_no
165
166 if code_block_end is not None and code_block_end + 1 != line_no:
167 # new code block is detected, render current code block
168 _render(**options) # pyright: ignore[reportUnknownArgumentType]
169 # reset conditions for next code block, which first line is the
170 # current code line
171 code_block = [code_line]
172 code_block_start = line_no
173 code_block_end = line_no
174 continue
175
176 # add line to the current code block and update last line n
177 code_block.append(code_line)
178 code_block_end = line_no
179
180 # highlight (last) code block
181 _render(**options) # pyright: ignore[reportUnknownArgumentType]
182 return "\n".join(html_code_blocks)
str HTML(self, **options)
Definition code.py:126
bool is_valid_language(str code_language)
Definition code.py:35