UNPKG

7.95 kBJavaScriptView Raw
1/*
2Language: R
3Description: R is a free software environment for statistical computing and graphics.
4Author: Joe Cheng <joe@rstudio.org>
5Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
6Website: https://www.r-project.org
7Category: common,scientific
8*/
9
10/** @type LanguageFn */
11function r(hljs) {
12 const regex = hljs.regex;
13 // Identifiers in R cannot start with `_`, but they can start with `.` if it
14 // is not immediately followed by a digit.
15 // R also supports quoted identifiers, which are near-arbitrary sequences
16 // delimited by backticks (`…`), which may contain escape sequences. These are
17 // handled in a separate mode. See `test/markup/r/names.txt` for examples.
18 // FIXME: Support Unicode identifiers.
19 const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
20 const NUMBER_TYPES_RE = regex.either(
21 // Special case: only hexadecimal binary powers can contain fractions
22 /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
23 // Hexadecimal numbers without fraction and optional binary power
24 /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
25 // Decimal numbers
26 /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
27 );
28 const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
29 const PUNCTUATION_RE = regex.either(
30 /[()]/,
31 /[{}]/,
32 /\[\[/,
33 /[[\]]/,
34 /\\/,
35 /,/
36 );
37
38 return {
39 name: 'R',
40
41 keywords: {
42 $pattern: IDENT_RE,
43 keyword:
44 'function if in break next repeat else for while',
45 literal:
46 'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 '
47 + 'NA_character_|10 NA_complex_|10',
48 built_in:
49 // Builtin constants
50 'LETTERS letters month.abb month.name pi T F '
51 // Primitive functions
52 // These are all the functions in `base` that are implemented as a
53 // `.Primitive`, minus those functions that are also keywords.
54 + 'abs acos acosh all any anyNA Arg as.call as.character '
55 + 'as.complex as.double as.environment as.integer as.logical '
56 + 'as.null.default as.numeric as.raw asin asinh atan atanh attr '
57 + 'attributes baseenv browser c call ceiling class Conj cos cosh '
58 + 'cospi cummax cummin cumprod cumsum digamma dim dimnames '
59 + 'emptyenv exp expression floor forceAndCall gamma gc.time '
60 + 'globalenv Im interactive invisible is.array is.atomic is.call '
61 + 'is.character is.complex is.double is.environment is.expression '
62 + 'is.finite is.function is.infinite is.integer is.language '
63 + 'is.list is.logical is.matrix is.na is.name is.nan is.null '
64 + 'is.numeric is.object is.pairlist is.raw is.recursive is.single '
65 + 'is.symbol lazyLoadDBfetch length lgamma list log max min '
66 + 'missing Mod names nargs nzchar oldClass on.exit pos.to.env '
67 + 'proc.time prod quote range Re rep retracemem return round '
68 + 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt '
69 + 'standardGeneric substitute sum switch tan tanh tanpi tracemem '
70 + 'trigamma trunc unclass untracemem UseMethod xtfrm',
71 },
72
73 contains: [
74 // Roxygen comments
75 hljs.COMMENT(
76 /#'/,
77 /$/,
78 { contains: [
79 {
80 // Handle `@examples` separately to cause all subsequent code
81 // until the next `@`-tag on its own line to be kept as-is,
82 // preventing highlighting. This code is example R code, so nested
83 // doctags shouldn’t be treated as such. See
84 // `test/markup/r/roxygen.txt` for an example.
85 scope: 'doctag',
86 match: /@examples/,
87 starts: {
88 end: regex.lookahead(regex.either(
89 // end if another doc comment
90 /\n^#'\s*(?=@[a-zA-Z]+)/,
91 // or a line with no comment
92 /\n^(?!#')/
93 )),
94 endsParent: true
95 }
96 },
97 {
98 // Handle `@param` to highlight the parameter name following
99 // after.
100 scope: 'doctag',
101 begin: '@param',
102 end: /$/,
103 contains: [
104 {
105 scope: 'variable',
106 variants: [
107 { match: IDENT_RE },
108 { match: /`(?:\\.|[^`\\])+`/ }
109 ],
110 endsParent: true
111 }
112 ]
113 },
114 {
115 scope: 'doctag',
116 match: /@[a-zA-Z]+/
117 },
118 {
119 scope: 'keyword',
120 match: /\\[a-zA-Z]+/
121 }
122 ] }
123 ),
124
125 hljs.HASH_COMMENT_MODE,
126
127 {
128 scope: 'string',
129 contains: [ hljs.BACKSLASH_ESCAPE ],
130 variants: [
131 hljs.END_SAME_AS_BEGIN({
132 begin: /[rR]"(-*)\(/,
133 end: /\)(-*)"/
134 }),
135 hljs.END_SAME_AS_BEGIN({
136 begin: /[rR]"(-*)\{/,
137 end: /\}(-*)"/
138 }),
139 hljs.END_SAME_AS_BEGIN({
140 begin: /[rR]"(-*)\[/,
141 end: /\](-*)"/
142 }),
143 hljs.END_SAME_AS_BEGIN({
144 begin: /[rR]'(-*)\(/,
145 end: /\)(-*)'/
146 }),
147 hljs.END_SAME_AS_BEGIN({
148 begin: /[rR]'(-*)\{/,
149 end: /\}(-*)'/
150 }),
151 hljs.END_SAME_AS_BEGIN({
152 begin: /[rR]'(-*)\[/,
153 end: /\](-*)'/
154 }),
155 {
156 begin: '"',
157 end: '"',
158 relevance: 0
159 },
160 {
161 begin: "'",
162 end: "'",
163 relevance: 0
164 }
165 ],
166 },
167
168 // Matching numbers immediately following punctuation and operators is
169 // tricky since we need to look at the character ahead of a number to
170 // ensure the number is not part of an identifier, and we cannot use
171 // negative look-behind assertions. So instead we explicitly handle all
172 // possible combinations of (operator|punctuation), number.
173 // TODO: replace with negative look-behind when available
174 // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
175 // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
176 // { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
177 {
178 relevance: 0,
179 variants: [
180 {
181 scope: {
182 1: 'operator',
183 2: 'number'
184 },
185 match: [
186 OPERATORS_RE,
187 NUMBER_TYPES_RE
188 ]
189 },
190 {
191 scope: {
192 1: 'operator',
193 2: 'number'
194 },
195 match: [
196 /%[^%]*%/,
197 NUMBER_TYPES_RE
198 ]
199 },
200 {
201 scope: {
202 1: 'punctuation',
203 2: 'number'
204 },
205 match: [
206 PUNCTUATION_RE,
207 NUMBER_TYPES_RE
208 ]
209 },
210 {
211 scope: { 2: 'number' },
212 match: [
213 /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
214 NUMBER_TYPES_RE
215 ]
216 }
217 ]
218 },
219
220 // Operators/punctuation when they're not directly followed by numbers
221 {
222 // Relevance boost for the most common assignment form.
223 scope: { 3: 'operator' },
224 match: [
225 IDENT_RE,
226 /\s+/,
227 /<-/,
228 /\s+/
229 ]
230 },
231
232 {
233 scope: 'operator',
234 relevance: 0,
235 variants: [
236 { match: OPERATORS_RE },
237 { match: /%[^%]*%/ }
238 ]
239 },
240
241 {
242 scope: 'punctuation',
243 relevance: 0,
244 match: PUNCTUATION_RE
245 },
246
247 {
248 // Escaped identifier
249 begin: '`',
250 end: '`',
251 contains: [ { begin: /\\./ } ]
252 }
253 ]
254 };
255}
256
257module.exports = r;
258
\No newline at end of file