UNPKG

7.77 kBJavaScriptView Raw
1/*
2Language: R
3Description: R is a free software environment for statistical computing and graphics.
4Author: Joe Cheng <joe@rstudio.org>
5Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
6Website: https://www.r-project.org
7Category: common,scientific
8*/
9
10/** @type LanguageFn */
11function r(hljs) {
12 const regex = hljs.regex;
13 // Identifiers in R cannot start with `_`, but they can start with `.` if it
14 // is not immediately followed by a digit.
15 // R also supports quoted identifiers, which are near-arbitrary sequences
16 // delimited by backticks (`…`), which may contain escape sequences. These are
17 // handled in a separate mode. See `test/markup/r/names.txt` for examples.
18 // FIXME: Support Unicode identifiers.
19 const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
20 const NUMBER_TYPES_RE = regex.either(
21 // Special case: only hexadecimal binary powers can contain fractions
22 /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
23 // Hexadecimal numbers without fraction and optional binary power
24 /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
25 // Decimal numbers
26 /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
27 );
28 const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
29 const PUNCTUATION_RE = regex.either(
30 /[()]/,
31 /[{}]/,
32 /\[\[/,
33 /[[\]]/,
34 /\\/,
35 /,/
36 );
37
38 return {
39 name: 'R',
40
41 keywords: {
42 $pattern: IDENT_RE,
43 keyword:
44 'function if in break next repeat else for while',
45 literal:
46 'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 ' +
47 'NA_character_|10 NA_complex_|10',
48 built_in:
49 // Builtin constants
50 'LETTERS letters month.abb month.name pi T F ' +
51 // Primitive functions
52 // These are all the functions in `base` that are implemented as a
53 // `.Primitive`, minus those functions that are also keywords.
54 'abs acos acosh all any anyNA Arg as.call as.character ' +
55 'as.complex as.double as.environment as.integer as.logical ' +
56 'as.null.default as.numeric as.raw asin asinh atan atanh attr ' +
57 'attributes baseenv browser c call ceiling class Conj cos cosh ' +
58 'cospi cummax cummin cumprod cumsum digamma dim dimnames ' +
59 'emptyenv exp expression floor forceAndCall gamma gc.time ' +
60 'globalenv Im interactive invisible is.array is.atomic is.call ' +
61 'is.character is.complex is.double is.environment is.expression ' +
62 'is.finite is.function is.infinite is.integer is.language ' +
63 'is.list is.logical is.matrix is.na is.name is.nan is.null ' +
64 'is.numeric is.object is.pairlist is.raw is.recursive is.single ' +
65 'is.symbol lazyLoadDBfetch length lgamma list log max min ' +
66 'missing Mod names nargs nzchar oldClass on.exit pos.to.env ' +
67 'proc.time prod quote range Re rep retracemem return round ' +
68 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt ' +
69 'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
70 'trigamma trunc unclass untracemem UseMethod xtfrm',
71 },
72
73 contains: [
74 // Roxygen comments
75 hljs.COMMENT(
76 /#'/,
77 /$/,
78 {
79 contains: [
80 {
81 // Handle `@examples` separately to cause all subsequent code
82 // until the next `@`-tag on its own line to be kept as-is,
83 // preventing highlighting. This code is example R code, so nested
84 // doctags shouldn’t be treated as such. See
85 // `test/markup/r/roxygen.txt` for an example.
86 scope: 'doctag',
87 match: /@examples/,
88 starts: {
89 end: regex.lookahead(regex.either(
90 // end if another doc comment
91 /\n^#'\s*(?=@[a-zA-Z]+)/,
92 // or a line with no comment
93 /\n^(?!#')/
94 )),
95 endsParent: true
96 }
97 },
98 {
99 // Handle `@param` to highlight the parameter name following
100 // after.
101 scope: 'doctag',
102 begin: '@param',
103 end: /$/,
104 contains: [
105 {
106 scope: 'variable',
107 variants: [
108 { match: IDENT_RE },
109 { match: /`(?:\\.|[^`\\])+`/ }
110 ],
111 endsParent: true
112 }
113 ]
114 },
115 {
116 scope: 'doctag',
117 match: /@[a-zA-Z]+/
118 },
119 {
120 scope: 'keyword',
121 match: /\\[a-zA-Z]+/
122 }
123 ]
124 }
125 ),
126
127 hljs.HASH_COMMENT_MODE,
128
129 {
130 scope: 'string',
131 contains: [hljs.BACKSLASH_ESCAPE],
132 variants: [
133 hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
134 hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\{/, end: /\}(-*)"/ }),
135 hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\[/, end: /\](-*)"/ }),
136 hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\(/, end: /\)(-*)'/ }),
137 hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\{/, end: /\}(-*)'/ }),
138 hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\[/, end: /\](-*)'/ }),
139 {begin: '"', end: '"', relevance: 0},
140 {begin: "'", end: "'", relevance: 0}
141 ],
142 },
143
144 // Matching numbers immediately following punctuation and operators is
145 // tricky since we need to look at the character ahead of a number to
146 // ensure the number is not part of an identifier, and we cannot use
147 // negative look-behind assertions. So instead we explicitly handle all
148 // possible combinations of (operator|punctuation), number.
149 // TODO: replace with negative look-behind when available
150 // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
151 // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
152 // { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
153 {
154 relevance: 0,
155 variants: [
156 {
157 scope: {
158 1: 'operator',
159 2: 'number'
160 },
161 match: [
162 OPERATORS_RE,
163 NUMBER_TYPES_RE
164 ]
165 },
166 {
167 scope: {
168 1: 'operator',
169 2: 'number'
170 },
171 match: [
172 /%[^%]*%/,
173 NUMBER_TYPES_RE
174 ]
175 },
176 {
177 scope: {
178 1: 'punctuation',
179 2: 'number'
180 },
181 match: [
182 PUNCTUATION_RE,
183 NUMBER_TYPES_RE
184 ]
185 },
186 {
187 scope: { 2: 'number' },
188 match: [
189 /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
190 NUMBER_TYPES_RE
191 ]
192 }
193 ]
194 },
195
196 // Operators/punctuation when they're not directly followed by numbers
197 {
198 // Relevance boost for the most common assignment form.
199 scope: { 3: 'operator' },
200 match: [
201 IDENT_RE,
202 /\s+/,
203 /<-/,
204 /\s+/
205 ]
206 },
207
208 {
209 scope: 'operator',
210 relevance: 0,
211 variants: [
212 { match: OPERATORS_RE },
213 { match: /%[^%]*%/ }
214 ]
215 },
216
217 {
218 scope: 'punctuation',
219 relevance: 0,
220 match: PUNCTUATION_RE
221 },
222
223 {
224 // Escaped identifier
225 begin: '`',
226 end: '`',
227 contains: [
228 { begin: /\\./ }
229 ]
230 }
231 ]
232 };
233}
234
235export { r as default };
236
\No newline at end of file