UNPKG

8.58 kBJavaScriptView Raw
1/*
2Language: Python
3Description: Python is an interpreted, object-oriented, high-level programming language with dynamic semantics.
4Website: https://www.python.org
5Category: common
6*/
7
8function python(hljs) {
9 const regex = hljs.regex;
10 const IDENT_RE = /[\p{XID_Start}_]\p{XID_Continue}*/u;
11 const RESERVED_WORDS = [
12 'and',
13 'as',
14 'assert',
15 'async',
16 'await',
17 'break',
18 'class',
19 'continue',
20 'def',
21 'del',
22 'elif',
23 'else',
24 'except',
25 'finally',
26 'for',
27 'from',
28 'global',
29 'if',
30 'import',
31 'in',
32 'is',
33 'lambda',
34 'nonlocal|10',
35 'not',
36 'or',
37 'pass',
38 'raise',
39 'return',
40 'try',
41 'while',
42 'with',
43 'yield'
44 ];
45
46 const BUILT_INS = [
47 '__import__',
48 'abs',
49 'all',
50 'any',
51 'ascii',
52 'bin',
53 'bool',
54 'breakpoint',
55 'bytearray',
56 'bytes',
57 'callable',
58 'chr',
59 'classmethod',
60 'compile',
61 'complex',
62 'delattr',
63 'dict',
64 'dir',
65 'divmod',
66 'enumerate',
67 'eval',
68 'exec',
69 'filter',
70 'float',
71 'format',
72 'frozenset',
73 'getattr',
74 'globals',
75 'hasattr',
76 'hash',
77 'help',
78 'hex',
79 'id',
80 'input',
81 'int',
82 'isinstance',
83 'issubclass',
84 'iter',
85 'len',
86 'list',
87 'locals',
88 'map',
89 'max',
90 'memoryview',
91 'min',
92 'next',
93 'object',
94 'oct',
95 'open',
96 'ord',
97 'pow',
98 'print',
99 'property',
100 'range',
101 'repr',
102 'reversed',
103 'round',
104 'set',
105 'setattr',
106 'slice',
107 'sorted',
108 'staticmethod',
109 'str',
110 'sum',
111 'super',
112 'tuple',
113 'type',
114 'vars',
115 'zip'
116 ];
117
118 const LITERALS = [
119 '__debug__',
120 'Ellipsis',
121 'False',
122 'None',
123 'NotImplemented',
124 'True'
125 ];
126
127 // https://docs.python.org/3/library/typing.html
128 // TODO: Could these be supplemented by a CamelCase matcher in certain
129 // contexts, leaving these remaining only for relevance hinting?
130 const TYPES = [
131 "Any",
132 "Callable",
133 "Coroutine",
134 "Dict",
135 "List",
136 "Literal",
137 "Generic",
138 "Optional",
139 "Sequence",
140 "Set",
141 "Tuple",
142 "Type",
143 "Union"
144 ];
145
146 const KEYWORDS = {
147 $pattern: /[A-Za-z]\w+|__\w+__/,
148 keyword: RESERVED_WORDS,
149 built_in: BUILT_INS,
150 literal: LITERALS,
151 type: TYPES
152 };
153
154 const PROMPT = {
155 className: 'meta',
156 begin: /^(>>>|\.\.\.) /
157 };
158
159 const SUBST = {
160 className: 'subst',
161 begin: /\{/,
162 end: /\}/,
163 keywords: KEYWORDS,
164 illegal: /#/
165 };
166
167 const LITERAL_BRACKET = {
168 begin: /\{\{/,
169 relevance: 0
170 };
171
172 const STRING = {
173 className: 'string',
174 contains: [ hljs.BACKSLASH_ESCAPE ],
175 variants: [
176 {
177 begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,
178 end: /'''/,
179 contains: [
180 hljs.BACKSLASH_ESCAPE,
181 PROMPT
182 ],
183 relevance: 10
184 },
185 {
186 begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,
187 end: /"""/,
188 contains: [
189 hljs.BACKSLASH_ESCAPE,
190 PROMPT
191 ],
192 relevance: 10
193 },
194 {
195 begin: /([fF][rR]|[rR][fF]|[fF])'''/,
196 end: /'''/,
197 contains: [
198 hljs.BACKSLASH_ESCAPE,
199 PROMPT,
200 LITERAL_BRACKET,
201 SUBST
202 ]
203 },
204 {
205 begin: /([fF][rR]|[rR][fF]|[fF])"""/,
206 end: /"""/,
207 contains: [
208 hljs.BACKSLASH_ESCAPE,
209 PROMPT,
210 LITERAL_BRACKET,
211 SUBST
212 ]
213 },
214 {
215 begin: /([uU]|[rR])'/,
216 end: /'/,
217 relevance: 10
218 },
219 {
220 begin: /([uU]|[rR])"/,
221 end: /"/,
222 relevance: 10
223 },
224 {
225 begin: /([bB]|[bB][rR]|[rR][bB])'/,
226 end: /'/
227 },
228 {
229 begin: /([bB]|[bB][rR]|[rR][bB])"/,
230 end: /"/
231 },
232 {
233 begin: /([fF][rR]|[rR][fF]|[fF])'/,
234 end: /'/,
235 contains: [
236 hljs.BACKSLASH_ESCAPE,
237 LITERAL_BRACKET,
238 SUBST
239 ]
240 },
241 {
242 begin: /([fF][rR]|[rR][fF]|[fF])"/,
243 end: /"/,
244 contains: [
245 hljs.BACKSLASH_ESCAPE,
246 LITERAL_BRACKET,
247 SUBST
248 ]
249 },
250 hljs.APOS_STRING_MODE,
251 hljs.QUOTE_STRING_MODE
252 ]
253 };
254
255 // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
256 const digitpart = '[0-9](_?[0-9])*';
257 const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
258 const NUMBER = {
259 className: 'number',
260 relevance: 0,
261 variants: [
262 // exponentfloat, pointfloat
263 // https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals
264 // optionally imaginary
265 // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
266 // Note: no leading \b because floats can start with a decimal point
267 // and we don't want to mishandle e.g. `fn(.5)`,
268 // no trailing \b for pointfloat because it can end with a decimal point
269 // and we don't want to mishandle e.g. `0..hex()`; this should be safe
270 // because both MUST contain a decimal point and so cannot be confused with
271 // the interior part of an identifier
272 {
273 begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
274 },
275 {
276 begin: `(${pointfloat})[jJ]?`
277 },
278
279 // decinteger, bininteger, octinteger, hexinteger
280 // https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals
281 // optionally "long" in Python 2
282 // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
283 // decinteger is optionally imaginary
284 // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
285 {
286 begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
287 },
288 {
289 begin: '\\b0[bB](_?[01])+[lL]?\\b'
290 },
291 {
292 begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
293 },
294 {
295 begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
296 },
297
298 // imagnumber (digitpart-based)
299 // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
300 {
301 begin: `\\b(${digitpart})[jJ]\\b`
302 }
303 ]
304 };
305 const COMMENT_TYPE = {
306 className: "comment",
307 begin: regex.lookahead(/# type:/),
308 end: /$/,
309 keywords: KEYWORDS,
310 contains: [
311 { // prevent keywords from coloring `type`
312 begin: /# type:/
313 },
314 // comment within a datatype comment includes no keywords
315 {
316 begin: /#/,
317 end: /\b\B/,
318 endsWithParent: true
319 }
320 ]
321 };
322 const PARAMS = {
323 className: 'params',
324 variants: [
325 // Exclude params in functions without params
326 {
327 className: "",
328 begin: /\(\s*\)/,
329 skip: true
330 },
331 {
332 begin: /\(/,
333 end: /\)/,
334 excludeBegin: true,
335 excludeEnd: true,
336 keywords: KEYWORDS,
337 contains: [
338 'self',
339 PROMPT,
340 NUMBER,
341 STRING,
342 hljs.HASH_COMMENT_MODE
343 ]
344 }
345 ]
346 };
347 SUBST.contains = [
348 STRING,
349 NUMBER,
350 PROMPT
351 ];
352
353 return {
354 name: 'Python',
355 aliases: [
356 'py',
357 'gyp',
358 'ipython'
359 ],
360 unicodeRegex: true,
361 keywords: KEYWORDS,
362 illegal: /(<\/|->|\?)|=>/,
363 contains: [
364 PROMPT,
365 NUMBER,
366 {
367 // very common convention
368 begin: /\bself\b/
369 },
370 {
371 // eat "if" prior to string so that it won't accidentally be
372 // labeled as an f-string
373 beginKeywords: "if",
374 relevance: 0
375 },
376 STRING,
377 COMMENT_TYPE,
378 hljs.HASH_COMMENT_MODE,
379 {
380 match: [
381 /def/, /\s+/,
382 IDENT_RE,
383 ],
384 scope: {
385 1: "keyword",
386 3: "title.function"
387 },
388 contains: [ PARAMS ]
389 },
390 {
391 variants: [
392 {
393 match: [
394 /class/, /\s+/,
395 IDENT_RE, /\s*/,
396 /\(\s*/, IDENT_RE,/\s*\)/
397 ],
398 },
399 {
400 match: [
401 /class/, /\s+/,
402 IDENT_RE
403 ],
404 }
405 ],
406 scope: {
407 1: "keyword",
408 3: "title.class",
409 6: "title.class.inherited",
410 }
411 },
412 {
413 className: 'meta',
414 begin: /^[\t ]*@/,
415 end: /(?=#)|$/,
416 contains: [
417 NUMBER,
418 PARAMS,
419 STRING
420 ]
421 }
422 ]
423 };
424}
425
426export { python as default };