UNPKG

12.3 kBPlain TextView Raw
1import { dirname, basename } from 'path'
2import {
3 ComputerLanguage,
4 Person,
5 SoftwarePackage,
6 SoftwareSourceCode
7} from '@stencila/schema'
8import OperatingSystem from '@stencila/schema/dist/OperatingSystem'
9
10import Parser from './Parser'
11import { default as pythonSystemModules } from './PythonBuiltins'
12
13const REQUIREMENTS_COMMENT_REGEX = /^\s*#/
14const REQUIREMENTS_EDITABLE_SOURCE_REGEX = /^\s*-e\s*([^\s]+)\s*/
15const REQUIREMENTS_INCLUDE_PATH_REGEX = /^\s*-r\s+([^\s]+)\s*/
16const REQUIREMENTS_STANDARD_REGEX = /^\s*([^\s]+)/
17
18/**
19 * Return true if the passed in line is a requirements.txt comment (starts with "#" which might be preceded by spaces).
20 */
21function lineIsComment (line: string): boolean {
22 return REQUIREMENTS_COMMENT_REGEX.exec(line) !== null
23}
24
25/**
26 * Execute the given `regex` against the line and return the first match. If there is no match, return `null`.
27 */
28function applyRegex (line: string, regex: RegExp): string | null {
29 const result = regex.exec(line)
30
31 if (result === null) {
32 return null
33 }
34 return result[1]
35}
36
37/**
38 * Execute the `REQUIREMENTS_EDITABLE_SOURCE_REGEX` against a line and return the first result (or null if no match).
39 * This is used to find a requirements.txt line of a URL source (e.g. including a package from github).
40 */
41function extractEditableSource (line: string): string | null {
42 return applyRegex(line, REQUIREMENTS_EDITABLE_SOURCE_REGEX)
43}
44
45/**
46 * Execute the `REQUIREMENTS_INCLUDE_PATH_REGEX` against a line and return the first result (or null if no match).
47 * This is used to find a requirements.txt line that includes another requirements file.
48 */
49function extractIncludedRequirementsPath (line: string): string | null {
50 return applyRegex(line, REQUIREMENTS_INCLUDE_PATH_REGEX)
51}
52
53/**
54 * Execute the `REQUIREMENTS_STANDARD_REGEX` against a line and return the first result (or null if no match).
55 * This is used to find "standard" requirements.txt lines.
56 */
57function extractStandardRequirements (line: string): string | null {
58 return applyRegex(line, REQUIREMENTS_STANDARD_REGEX)
59}
60
61/**
62 * Split a requirement line into name and then version. For example "package==1.0.1" => ["package", "==1.0.1"]
63 * The version specifier can be `==`, `<=`, `>=`, `~=`, `<` or `>`.
64 */
65function splitStandardRequirementVersion (requirement: string): [string, string | null] {
66 let firstSplitterIndex = -1
67
68 for (let splitter of ['==', '<=', '>=', '~=', '<', '>']) {
69 let splitterIndex = requirement.indexOf(splitter)
70 if (splitterIndex > -1 && (firstSplitterIndex === -1 || splitterIndex < firstSplitterIndex)) {
71 firstSplitterIndex = splitterIndex
72 }
73 }
74
75 if (firstSplitterIndex !== -1) {
76 return [requirement.substring(0, firstSplitterIndex), requirement.substring(firstSplitterIndex)]
77 }
78
79 return [requirement, null]
80}
81
82/**
83 * Convert a list of classifiers to a Map between main classification and sub classification(s).
84 * e.g: ['A :: B', 'A :: C', 'D :: E'] => {'A': ['B', 'C'], 'D': ['E']}
85 */
86function buildClassifierMap (classifiers: Array<string>): Map<string, Array<string>> {
87 const classifierMap = new Map<string, Array<string>>()
88
89 for (let classifier of classifiers) {
90 let doubleColonPosition = classifier.indexOf('::')
91
92 let classifierKey = classifier.substring(0, doubleColonPosition).trim()
93 let classifierValue = classifier.substring(doubleColonPosition + 2).trim()
94
95 if (!classifierMap.has(classifierKey)) {
96 classifierMap.set(classifierKey, [])
97 }
98
99 classifierMap.get(classifierKey)!.push(classifierValue)
100 }
101
102 return classifierMap
103}
104
105/**
106 * Each PyPI "Topic" might contain multiple levels of categorisation separated by "::". E.g.
107 * "Topic :: Category :: Secondary Category :: Tertiary Category". This will split into an array of strings of the same
108 * length as the number of categories, i.e. ["Category", "Secondary Category", "Tertiary Category"]
109 */
110function splitTopic (topics: string): Array<string> {
111 return topics.split('::').map(topic => topic.trim())
112}
113
114/**
115 * Parse an array of PyPI formatted topics into unique lists, returns a tuple of top level and optionally second level
116 * topics. This is because PyPI will repeat top level Topics in sub topics, e.g. the list might contain:
117 * ["Topic :: Game", "Topic :: Game :: Arcade"] hence "Game" is defined twice.
118 */
119function parseTopics (topicsList: Array<string>): [Array<string>, Array<string>] {
120 let primaryTopics: Array<string> = []
121 let secondaryTopics: Array<string> = []
122
123 for (let topics of topicsList) {
124 let splitTopics = splitTopic(topics)
125 if (splitTopics.length) {
126 if (!primaryTopics.includes(splitTopics[0])) primaryTopics.push(splitTopics[0])
127
128 if (splitTopics.length > 1) {
129 if (!secondaryTopics.includes(splitTopics[1])) secondaryTopics.push(splitTopics[1])
130 }
131 }
132 }
133
134 return [primaryTopics, secondaryTopics]
135}
136
137/**
138 * Convert a string containing an operating system name into an array of `OperatingSystem`s. In some instances the
139 * description may map to multiple `OperatingSystems`, e.g. "Unix" => Linux and macOS.
140 */
141function parseOperatingSystem (operatingSystem: string): Array<OperatingSystem> {
142 if (operatingSystem.match(/windows/i)) {
143 return [OperatingSystem.windows]
144 }
145
146 if (operatingSystem.match(/unix/i)) {
147 return [OperatingSystem.linux, OperatingSystem.macos, OperatingSystem.unix]
148 }
149
150 if (operatingSystem.match(/linux/i)) {
151 return [OperatingSystem.linux]
152 }
153
154 if (operatingSystem.match(/macos/i) || operatingSystem.match(/mac os/i)) {
155 return [OperatingSystem.macos]
156 }
157
158 return []
159}
160
161export enum RequirementType {
162 Named,
163 URL
164}
165
166interface PythonRequirement {
167 /**
168 * Type of requirement specified (name or URL)
169 */
170 type: RequirementType
171
172 /**
173 * Name or URL value of the requirement
174 */
175 value: string
176
177 /**
178 * Version of the requirement
179 */
180 version?: string | null
181}
182
183/**
184 * Parser to be used on a directory with Python source code and (optionally) a `requirements.txt` file.
185 * If no `requirements.txt` file exists then the Parser will attempt to read requirements from the Python source code.
186 */
187export default class PythonParser extends Parser {
188
189 async parse (): Promise<SoftwarePackage | null> {
190 const files = this.glob(['**/*.py'])
191
192 if (!files.length) {
193 // no .py files so don't parse this directory
194 return null
195 }
196
197 const pkg = new SoftwarePackage()
198 pkg.runtimePlatform = 'Python'
199
200 if (this.folder) {
201 pkg.name = basename(this.folder)
202 }
203
204 let requirements
205
206 if (this.exists('requirements.txt')) {
207 requirements = await this.parseRequirementsFile('requirements.txt')
208 } else {
209 requirements = this.generateRequirementsFromSource()
210 }
211
212 for (let rawRequirement of requirements) {
213 if (rawRequirement.type === RequirementType.Named) {
214 pkg.softwareRequirements.push(await this.createPackage(rawRequirement))
215 } else if (rawRequirement.type === RequirementType.URL) {
216 let sourceRequirement = new SoftwareSourceCode()
217 sourceRequirement.runtimePlatform = 'Python'
218 sourceRequirement.codeRepository = rawRequirement.value
219 }
220 }
221
222 return pkg
223 }
224
225 /**
226 * Convert a `PythonRequirement` into a `SoftwarePackage` by augmenting with metadata from PyPI
227 */
228 private async createPackage (requirement: PythonRequirement): Promise<SoftwarePackage> {
229 const softwarePackage = new SoftwarePackage()
230 softwarePackage.name = requirement.value
231 softwarePackage.runtimePlatform = 'Python'
232 softwarePackage.programmingLanguages = [ComputerLanguage.py]
233
234 if (requirement.version) {
235 softwarePackage.version = requirement.version
236 }
237
238 const pyPiMetadata = await this.fetch(`https://pypi.org/pypi/${softwarePackage.name}/json`)
239
240 if (pyPiMetadata.info) {
241 if (pyPiMetadata.info.author) {
242 softwarePackage.authors.push(Person.fromText(`${pyPiMetadata.info.author} <${pyPiMetadata.info.author_email}>`))
243 }
244
245 if (pyPiMetadata.info.project_url) {
246 softwarePackage.codeRepository = pyPiMetadata.info.project_url
247 }
248
249 if (pyPiMetadata.info.classifiers) {
250 const classifiers = buildClassifierMap(pyPiMetadata.info.classifiers)
251
252 if (classifiers.has('Topic')) {
253 let [topics, subTopics] = parseTopics(classifiers.get('Topic')!)
254
255 if (topics.length) softwarePackage.applicationCategories = topics
256 if (subTopics.length) softwarePackage.applicationSubCategories = subTopics
257 }
258
259 if (classifiers.has('Operating System')) {
260 const operatingSystems: Array<OperatingSystem> = []
261
262 for (let operatingSystemDescription of classifiers.get('Operating System')!) {
263 for (let operatingSystem of parseOperatingSystem(operatingSystemDescription)) {
264 if (!operatingSystems.includes(operatingSystem)) operatingSystems.push(operatingSystem)
265 }
266 }
267 softwarePackage.operatingSystems = operatingSystems
268 }
269 }
270 if (pyPiMetadata.info.keywords) softwarePackage.keywords = pyPiMetadata.info.keywords
271
272 if (pyPiMetadata.info.license) softwarePackage.license = pyPiMetadata.info.license
273
274 if (pyPiMetadata.info.long_description) {
275 softwarePackage.description = pyPiMetadata.info.long_description
276 } else if (pyPiMetadata.info.description) {
277 softwarePackage.description = pyPiMetadata.info.description
278 }
279 }
280 return softwarePackage
281 }
282
283 /**
284 * Parse a `requirements.txt` file at `path` and return a list of `PythonRequirement`s
285 */
286 async parseRequirementsFile (path: string): Promise<Array<PythonRequirement>> {
287 const requirementsContent = this.read(path)
288
289 const allRequirementLines = requirementsContent.split('\n')
290
291 let requirements: Array<PythonRequirement> = []
292
293 for (let line of allRequirementLines) {
294 if (lineIsComment(line)) {
295 continue
296 }
297 let editableSource = extractEditableSource(line)
298
299 if (editableSource !== null) {
300 requirements.push({ value: editableSource, type: RequirementType.URL })
301 continue
302 }
303
304 let includePath = extractIncludedRequirementsPath(line)
305
306 if (includePath !== null) {
307 let includedRequirements = await this.parseRequirementsFile(includePath)
308 requirements = requirements.concat(includedRequirements)
309 continue
310 }
311
312 let standardRequirement = extractStandardRequirements(line)
313 if (standardRequirement !== null) {
314 let [requirementName, version] = splitStandardRequirementVersion(standardRequirement)
315 requirements.push({ value: requirementName, type: RequirementType.Named, version: version })
316 }
317 }
318
319 return requirements
320 }
321
322 /**
323 * Parse Python source files are find any non-system imports, return this as an array of `PythonRequirement`s.
324 */
325 generateRequirementsFromSource (): Array<PythonRequirement> {
326 const nonSystemImports = this.findImports().filter(pythonImport => !pythonSystemModules.includes(pythonImport))
327
328 return nonSystemImports.map(nonSystemImport => {
329 return {
330 value: nonSystemImport, type: RequirementType.Named, version: ''
331 }
332 })
333 }
334
335 /**
336 * Parse Python source files are find all imports (including system imports).
337 */
338 findImports (): Array<string> {
339 const files = this.glob(['**/*.py'])
340
341 const imports: Array<string> = []
342
343 if (files.length) {
344 for (let file of files) {
345 for (let importName of this.readImportsInFile(file)) {
346 if (!imports.includes(importName)) imports.push(importName)
347 }
348 }
349 }
350 return imports
351 }
352
353 /**
354 * Parse Python a single Python source file for imports.
355 */
356 readImportsInFile (path: string): Array<string> {
357 const fileContent = this.read(path)
358 const importRegex = /^\s*from ([\w_]+)|^\s*import ([\w_]+)/gm
359 const imports: Array<string> = []
360 const fileDirectory = dirname(path)
361 while (true) {
362 let match = importRegex.exec(fileContent)
363
364 if (!match) break
365
366 const pkg = match[1] || match[2]
367 if (this.glob([fileDirectory + '/' + pkg + '.py', fileDirectory + '/' + pkg + '/__init__.py']).length) {
368 continue
369 }
370 if (!imports.includes(pkg)) imports.push(pkg)
371 }
372 return imports
373 }
374}