1 | import { dirname, basename } from 'path'
|
2 | import {
|
3 | ComputerLanguage,
|
4 | Person,
|
5 | SoftwarePackage,
|
6 | SoftwareSourceCode
|
7 | } from '@stencila/schema'
|
8 | import OperatingSystem from '@stencila/schema/dist/OperatingSystem'
|
9 |
|
10 | import Parser from './Parser'
|
11 | import { default as pythonSystemModules } from './PythonBuiltins'
|
12 |
|
13 | const REQUIREMENTS_COMMENT_REGEX = /^\s*#/
|
14 | const REQUIREMENTS_EDITABLE_SOURCE_REGEX = /^\s*-e\s*([^\s]+)\s*/
|
15 | const REQUIREMENTS_INCLUDE_PATH_REGEX = /^\s*-r\s+([^\s]+)\s*/
|
16 | const REQUIREMENTS_STANDARD_REGEX = /^\s*([^\s]+)/
|
17 |
|
18 |
|
19 |
|
20 |
|
21 | function lineIsComment (line: string): boolean {
|
22 | return REQUIREMENTS_COMMENT_REGEX.exec(line) !== null
|
23 | }
|
24 |
|
25 |
|
26 |
|
27 |
|
28 | function applyRegex (line: string, regex: RegExp): string | null {
|
29 | const result = regex.exec(line)
|
30 |
|
31 | if (result === null) {
|
32 | return null
|
33 | }
|
34 | return result[1]
|
35 | }
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 | function extractEditableSource (line: string): string | null {
|
42 | return applyRegex(line, REQUIREMENTS_EDITABLE_SOURCE_REGEX)
|
43 | }
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 | function extractIncludedRequirementsPath (line: string): string | null {
|
50 | return applyRegex(line, REQUIREMENTS_INCLUDE_PATH_REGEX)
|
51 | }
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 | function extractStandardRequirements (line: string): string | null {
|
58 | return applyRegex(line, REQUIREMENTS_STANDARD_REGEX)
|
59 | }
|
60 |
|
61 |
|
62 |
|
63 |
|
64 |
|
65 | function splitStandardRequirementVersion (requirement: string): [string, string | null] {
|
66 | let firstSplitterIndex = -1
|
67 |
|
68 | for (let splitter of ['==', '<=', '>=', '~=', '<', '>']) {
|
69 | let splitterIndex = requirement.indexOf(splitter)
|
70 | if (splitterIndex > -1 && (firstSplitterIndex === -1 || splitterIndex < firstSplitterIndex)) {
|
71 | firstSplitterIndex = splitterIndex
|
72 | }
|
73 | }
|
74 |
|
75 | if (firstSplitterIndex !== -1) {
|
76 | return [requirement.substring(0, firstSplitterIndex), requirement.substring(firstSplitterIndex)]
|
77 | }
|
78 |
|
79 | return [requirement, null]
|
80 | }
|
81 |
|
82 |
|
83 |
|
84 |
|
85 |
|
86 | function buildClassifierMap (classifiers: Array<string>): Map<string, Array<string>> {
|
87 | const classifierMap = new Map<string, Array<string>>()
|
88 |
|
89 | for (let classifier of classifiers) {
|
90 | let doubleColonPosition = classifier.indexOf('::')
|
91 |
|
92 | let classifierKey = classifier.substring(0, doubleColonPosition).trim()
|
93 | let classifierValue = classifier.substring(doubleColonPosition + 2).trim()
|
94 |
|
95 | if (!classifierMap.has(classifierKey)) {
|
96 | classifierMap.set(classifierKey, [])
|
97 | }
|
98 |
|
99 | classifierMap.get(classifierKey)!.push(classifierValue)
|
100 | }
|
101 |
|
102 | return classifierMap
|
103 | }
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 |
|
110 | function splitTopic (topics: string): Array<string> {
|
111 | return topics.split('::').map(topic => topic.trim())
|
112 | }
|
113 |
|
114 |
|
115 |
|
116 |
|
117 |
|
118 |
|
119 | function parseTopics (topicsList: Array<string>): [Array<string>, Array<string>] {
|
120 | let primaryTopics: Array<string> = []
|
121 | let secondaryTopics: Array<string> = []
|
122 |
|
123 | for (let topics of topicsList) {
|
124 | let splitTopics = splitTopic(topics)
|
125 | if (splitTopics.length) {
|
126 | if (!primaryTopics.includes(splitTopics[0])) primaryTopics.push(splitTopics[0])
|
127 |
|
128 | if (splitTopics.length > 1) {
|
129 | if (!secondaryTopics.includes(splitTopics[1])) secondaryTopics.push(splitTopics[1])
|
130 | }
|
131 | }
|
132 | }
|
133 |
|
134 | return [primaryTopics, secondaryTopics]
|
135 | }
|
136 |
|
137 |
|
138 |
|
139 |
|
140 |
|
141 | function parseOperatingSystem (operatingSystem: string): Array<OperatingSystem> {
|
142 | if (operatingSystem.match(/windows/i)) {
|
143 | return [OperatingSystem.windows]
|
144 | }
|
145 |
|
146 | if (operatingSystem.match(/unix/i)) {
|
147 | return [OperatingSystem.linux, OperatingSystem.macos, OperatingSystem.unix]
|
148 | }
|
149 |
|
150 | if (operatingSystem.match(/linux/i)) {
|
151 | return [OperatingSystem.linux]
|
152 | }
|
153 |
|
154 | if (operatingSystem.match(/macos/i) || operatingSystem.match(/mac os/i)) {
|
155 | return [OperatingSystem.macos]
|
156 | }
|
157 |
|
158 | return []
|
159 | }
|
160 |
|
161 | export enum RequirementType {
|
162 | Named,
|
163 | URL
|
164 | }
|
165 |
|
166 | interface PythonRequirement {
|
167 | |
168 |
|
169 |
|
170 | type: RequirementType
|
171 |
|
172 | |
173 |
|
174 |
|
175 | value: string
|
176 |
|
177 | |
178 |
|
179 |
|
180 | version?: string | null
|
181 | }
|
182 |
|
183 |
|
184 |
|
185 |
|
186 |
|
187 | export default class PythonParser extends Parser {
|
188 |
|
189 | async parse (): Promise<SoftwarePackage | null> {
|
190 | const files = this.glob(['**/*.py'])
|
191 |
|
192 | if (!files.length) {
|
193 |
|
194 | return null
|
195 | }
|
196 |
|
197 | const pkg = new SoftwarePackage()
|
198 | pkg.runtimePlatform = 'Python'
|
199 |
|
200 | if (this.folder) {
|
201 | pkg.name = basename(this.folder)
|
202 | }
|
203 |
|
204 | let requirements
|
205 |
|
206 | if (this.exists('requirements.txt')) {
|
207 | requirements = await this.parseRequirementsFile('requirements.txt')
|
208 | } else {
|
209 | requirements = this.generateRequirementsFromSource()
|
210 | }
|
211 |
|
212 | for (let rawRequirement of requirements) {
|
213 | if (rawRequirement.type === RequirementType.Named) {
|
214 | pkg.softwareRequirements.push(await this.createPackage(rawRequirement))
|
215 | } else if (rawRequirement.type === RequirementType.URL) {
|
216 | let sourceRequirement = new SoftwareSourceCode()
|
217 | sourceRequirement.runtimePlatform = 'Python'
|
218 | sourceRequirement.codeRepository = rawRequirement.value
|
219 | }
|
220 | }
|
221 |
|
222 | return pkg
|
223 | }
|
224 |
|
225 | |
226 |
|
227 |
|
228 | private async createPackage (requirement: PythonRequirement): Promise<SoftwarePackage> {
|
229 | const softwarePackage = new SoftwarePackage()
|
230 | softwarePackage.name = requirement.value
|
231 | softwarePackage.runtimePlatform = 'Python'
|
232 | softwarePackage.programmingLanguages = [ComputerLanguage.py]
|
233 |
|
234 | if (requirement.version) {
|
235 | softwarePackage.version = requirement.version
|
236 | }
|
237 |
|
238 | const pyPiMetadata = await this.fetch(`https://pypi.org/pypi/${softwarePackage.name}/json`)
|
239 |
|
240 | if (pyPiMetadata.info) {
|
241 | if (pyPiMetadata.info.author) {
|
242 | softwarePackage.authors.push(Person.fromText(`${pyPiMetadata.info.author} <${pyPiMetadata.info.author_email}>`))
|
243 | }
|
244 |
|
245 | if (pyPiMetadata.info.project_url) {
|
246 | softwarePackage.codeRepository = pyPiMetadata.info.project_url
|
247 | }
|
248 |
|
249 | if (pyPiMetadata.info.classifiers) {
|
250 | const classifiers = buildClassifierMap(pyPiMetadata.info.classifiers)
|
251 |
|
252 | if (classifiers.has('Topic')) {
|
253 | let [topics, subTopics] = parseTopics(classifiers.get('Topic')!)
|
254 |
|
255 | if (topics.length) softwarePackage.applicationCategories = topics
|
256 | if (subTopics.length) softwarePackage.applicationSubCategories = subTopics
|
257 | }
|
258 |
|
259 | if (classifiers.has('Operating System')) {
|
260 | const operatingSystems: Array<OperatingSystem> = []
|
261 |
|
262 | for (let operatingSystemDescription of classifiers.get('Operating System')!) {
|
263 | for (let operatingSystem of parseOperatingSystem(operatingSystemDescription)) {
|
264 | if (!operatingSystems.includes(operatingSystem)) operatingSystems.push(operatingSystem)
|
265 | }
|
266 | }
|
267 | softwarePackage.operatingSystems = operatingSystems
|
268 | }
|
269 | }
|
270 | if (pyPiMetadata.info.keywords) softwarePackage.keywords = pyPiMetadata.info.keywords
|
271 |
|
272 | if (pyPiMetadata.info.license) softwarePackage.license = pyPiMetadata.info.license
|
273 |
|
274 | if (pyPiMetadata.info.long_description) {
|
275 | softwarePackage.description = pyPiMetadata.info.long_description
|
276 | } else if (pyPiMetadata.info.description) {
|
277 | softwarePackage.description = pyPiMetadata.info.description
|
278 | }
|
279 | }
|
280 | return softwarePackage
|
281 | }
|
282 |
|
283 | |
284 |
|
285 |
|
286 | async parseRequirementsFile (path: string): Promise<Array<PythonRequirement>> {
|
287 | const requirementsContent = this.read(path)
|
288 |
|
289 | const allRequirementLines = requirementsContent.split('\n')
|
290 |
|
291 | let requirements: Array<PythonRequirement> = []
|
292 |
|
293 | for (let line of allRequirementLines) {
|
294 | if (lineIsComment(line)) {
|
295 | continue
|
296 | }
|
297 | let editableSource = extractEditableSource(line)
|
298 |
|
299 | if (editableSource !== null) {
|
300 | requirements.push({ value: editableSource, type: RequirementType.URL })
|
301 | continue
|
302 | }
|
303 |
|
304 | let includePath = extractIncludedRequirementsPath(line)
|
305 |
|
306 | if (includePath !== null) {
|
307 | let includedRequirements = await this.parseRequirementsFile(includePath)
|
308 | requirements = requirements.concat(includedRequirements)
|
309 | continue
|
310 | }
|
311 |
|
312 | let standardRequirement = extractStandardRequirements(line)
|
313 | if (standardRequirement !== null) {
|
314 | let [requirementName, version] = splitStandardRequirementVersion(standardRequirement)
|
315 | requirements.push({ value: requirementName, type: RequirementType.Named, version: version })
|
316 | }
|
317 | }
|
318 |
|
319 | return requirements
|
320 | }
|
321 |
|
322 | |
323 |
|
324 |
|
325 | generateRequirementsFromSource (): Array<PythonRequirement> {
|
326 | const nonSystemImports = this.findImports().filter(pythonImport => !pythonSystemModules.includes(pythonImport))
|
327 |
|
328 | return nonSystemImports.map(nonSystemImport => {
|
329 | return {
|
330 | value: nonSystemImport, type: RequirementType.Named, version: ''
|
331 | }
|
332 | })
|
333 | }
|
334 |
|
335 | |
336 |
|
337 |
|
338 | findImports (): Array<string> {
|
339 | const files = this.glob(['**/*.py'])
|
340 |
|
341 | const imports: Array<string> = []
|
342 |
|
343 | if (files.length) {
|
344 | for (let file of files) {
|
345 | for (let importName of this.readImportsInFile(file)) {
|
346 | if (!imports.includes(importName)) imports.push(importName)
|
347 | }
|
348 | }
|
349 | }
|
350 | return imports
|
351 | }
|
352 |
|
353 | |
354 |
|
355 |
|
356 | readImportsInFile (path: string): Array<string> {
|
357 | const fileContent = this.read(path)
|
358 | const importRegex = /^\s*from ([\w_]+)|^\s*import ([\w_]+)/gm
|
359 | const imports: Array<string> = []
|
360 | const fileDirectory = dirname(path)
|
361 | while (true) {
|
362 | let match = importRegex.exec(fileContent)
|
363 |
|
364 | if (!match) break
|
365 |
|
366 | const pkg = match[1] || match[2]
|
367 | if (this.glob([fileDirectory + '/' + pkg + '.py', fileDirectory + '/' + pkg + '/__init__.py']).length) {
|
368 | continue
|
369 | }
|
370 | if (!imports.includes(pkg)) imports.push(pkg)
|
371 | }
|
372 | return imports
|
373 | }
|
374 | }
|