UNPKG

7.86 kBPlain TextView Raw
1import path from 'path'
2
3import Parser from './Parser'
4import { SoftwarePackage, Person } from '@stencila/schema'
5
6/**
7 * Dockter `Parser` class for R requirements files and source code.
8 *
9 * For each package, meta-data is obtained from http://crandb.r-pkg.org and used to create a `SoftwarePackage` instance
10 * using crosswalks from column "R Package Description" in https://github.com/codemeta/codemeta/blob/master/crosswalk.csv
11 *
12 * System dependencies for each package are obtained from https://sysreqs.r-hub.io.
13 */
14export default class RParser extends Parser {
15
16 /**
17 * Parse a folder by detecting any R requirements or source code files
18 * and return a `SoftwarePackage` instance
19 */
20 async parse (): Promise<SoftwarePackage | null> {
21 const pkg = new SoftwarePackage()
22
23 let name
24 let version
25 let date: Date | undefined = undefined
26 let packages: Array<string> = []
27
28 if (this.exists('DESCRIPTION')) {
29 // Read the existing/generated DESCRIPTION file
30 let desc = this.read('DESCRIPTION')
31
32 // Get `name`
33 const matchName = desc.match(/^Package:\s*(.+)/m)
34 if (matchName) {
35 name = matchName[1]
36 }
37
38 // Get `date`, if no date then use yesterday's date to ensure
39 // packages are available on MRAN
40 const matchDate = desc.match(/^Date:\s*(.+)/m)
41 if (matchDate) {
42 let dateNum = Date.parse(matchDate[1])
43 if (isNaN(dateNum)) {
44 throw new Error('Unable to parse date in DESCRIPTION file: ' + matchDate[1])
45 } else {
46 date = new Date(dateNum)
47 }
48 }
49
50 // Get dependencies
51 const start = /^Imports:[ \t]*\n/gm.exec(desc)
52 if (start) {
53 // Find next un-indented line or use end of string
54 let match = desc.substring(start.index + start[0].length).match(/\n^\w/m)
55 let end
56 if (match) end = match.index
57 else end = desc.length - 1
58 const imports = desc.substring(start.index + start[0].length, end)
59 for (let imported of imports.split(',')) {
60 let pkg
61 const match = imported.match(/^\s*(\w+).*/)
62 if (match) {
63 pkg = match[1]
64 } else {
65 pkg = imported.trim()
66 }
67 if (pkg.length) packages.push(pkg)
68 }
69 }
70 } else {
71 // Scan the directory for any R or Rmd files
72 const files = this.glob(['**/*.R', '**/*.Rmd'])
73 if (files.length) {
74 // Analyse files for `library(<pkg>)`, `require(<pkg>)`, `<pkg>::<member>`, `<pkg>:::<member>`
75 // Wondering WTF this regex does? See https://regex101.com/r/hG4iij/4
76 const regex = /(?:(?:library|require)\s*\(\s*(?:(?:\s*(\w+)\s*)|(?:"([^"]*)")|(?:'([^']*)'))\s*\))|(?:(\w+):::?\w+)/g
77 for (let file of files) {
78 let code = this.read(file)
79 let match = regex.exec(code)
80 while (match) {
81 const pkg = match[1] || match[2] || match[3] || match[4]
82 if (!packages.includes(pkg)) packages.push(pkg)
83 match = regex.exec(code)
84 }
85 }
86 packages.sort()
87 } else {
88 // If no R files detected, return null
89 return null
90 }
91 }
92
93 // Default to the folder name, with any non alphanumerics removed to ensure compatibility
94 // with R package name requirements
95 if (!name) name = path.basename(this.folder).replace(/[^a-zA-Z0-9]/g, '')
96 // Default to yesterday's date (to ensure MRAN is available for the date)
97 if (!date) date = new Date(Date.now() - 24 * 3600 * 1000)
98
99 // Set package properties
100 pkg.name = name
101 pkg.runtimePlatform = 'R'
102 pkg.datePublished = date.toISOString().substring(0,10)
103
104 // For each dependency, query https://crandb.r-pkg.org to get a manifest including it's own
105 // dependencies and convert it to a `SoftwarePackage`
106 pkg.softwareRequirements = await Promise.all(
107 packages.map(name => this.createPackage(name))
108 )
109
110 return pkg
111 }
112
113 /**
114 * Create a `SoftwarePackage` instance from a R package name
115 *
116 * This method fetches meta-data for a R package to populate the properties
117 * of a `SoftwarePackage` instance. It recursively fetches meta-data on the package's
118 * dependencies, including system dependencies.
119 *
120 * @param name Name of the R package
121 */
122 private async createPackage (name: string): Promise<SoftwarePackage> {
123 // Create new package instance and populate it's
124 // properties in order of type hierarchy:
125 // Thing > CreativeWork > SoftwareSourceCode > SoftwarePackage
126 const pkg = new SoftwarePackage()
127 pkg.name = name
128
129 // These packages are built-in to R distributions, so we don't need to collect
130 // meta-data for them.
131 if (['stats', 'graphics', 'grDevices', 'tools', 'utils', 'datasets', 'methods'].includes(name)) {
132 return pkg
133 }
134
135 // Fetch meta-data from CRANDB
136 // If null (i.e. 404) then return package as is
137 const crandb = await this.fetch(`http://crandb.r-pkg.org/${name}`)
138 if (crandb === null) return pkg
139
140 // schema:Thing
141 pkg.description = crandb.Description
142 if (crandb.URL) pkg.urls = crandb.URL.split(',')
143
144 // schema:CreativeWork
145 if (crandb.Author) {
146 crandb.Author.split(',\n').map((author: string) => {
147 const match = author.match(/^([^\[]+?) \[([^\]]+)\]/)
148 if (match) {
149 const name = match[1]
150 const person = Person.fromText(name)
151 const roles = match[2].split(', ')
152 if (roles.includes('aut')) pkg.authors.push(person)
153 if (roles.includes('ctb')) pkg.contributors.push(person)
154 if (roles.includes('cre')) pkg.creators.push(person)
155 } else {
156 pkg.authors.push(Person.fromText(author))
157 }
158 })
159 }
160 pkg.datePublished = crandb['Date/Publication']
161 pkg.license = crandb.License
162
163 // schema:SoftwareSourceCode
164 pkg.runtimePlatform = 'R'
165 if (crandb.URL) pkg.codeRepository = crandb.URL.split(',') // See issue #35
166
167 // stencila:SoftwarePackage
168 // Create `SoftwarePackage` for each dependency
169 if (crandb.Imports) {
170 pkg.softwareRequirements = await Promise.all(
171 Object.entries(crandb.Imports).map(([name, version]) => this.createPackage(name))
172 )
173 }
174
175 // Required system dependencies are obtained from https://sysreqs.r-hub.io and
176 // added as `softwareRequirements` with "deb" as `runtimePlatform`
177 const sysreqs = await this.fetch(`https://sysreqs.r-hub.io/pkg/${name}`)
178
179 for (let sysreq of sysreqs) {
180 const keys = Object.keys(sysreq)
181 if (keys.length > 1) throw new Error(`Expected on one key for each sysreq but got: ${keys.join(',')}`)
182 const name = keys[0]
183 const debPackage = sysreq[name].platforms['DEB']
184 // The deb package can be null e.g. `curl https://sysreqs.r-hub.io/pkg/lubridate`
185 if (typeof debPackage === 'string') {
186 // Handle strings e.g. curl https://sysreqs.r-hub.io/pkg/XML
187 const required = new SoftwarePackage()
188 required.name = debPackage
189 required.runtimePlatform = 'deb'
190 pkg.softwareRequirements.push(required)
191 } else if (Array.isArray(debPackage)) {
192 // Handle arrays e.g. curl https://sysreqs.r-hub.io/pkg/gsl
193 for (let deb of debPackage.filter(deb => deb.distribution === 'Ubuntu' && deb.releases === undefined)) {
194 if (deb.buildtime) {
195 const required = new SoftwarePackage()
196 required.name = deb.buildtime
197 required.runtimePlatform = 'deb'
198 pkg.softwareRequirements.push(required)
199 }
200 if (deb.runtime) {
201 const required = new SoftwarePackage()
202 required.name = deb.runtime
203 required.runtimePlatform = 'deb'
204 pkg.softwareRequirements.push(required)
205 }
206 }
207 }
208 }
209
210 return pkg
211 }
212}