UNPKG

13.2 kBJavaScriptView Raw
1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
3 return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6const path_1 = require("path");
7const schema_1 = require("@stencila/schema");
8const OperatingSystem_1 = __importDefault(require("@stencila/schema/dist/OperatingSystem"));
9const Parser_1 = __importDefault(require("./Parser"));
10const PythonBuiltins_1 = __importDefault(require("./PythonBuiltins"));
11const REQUIREMENTS_COMMENT_REGEX = /^\s*#/;
12const REQUIREMENTS_EDITABLE_SOURCE_REGEX = /^\s*-e\s*([^\s]+)\s*/;
13const REQUIREMENTS_INCLUDE_PATH_REGEX = /^\s*-r\s+([^\s]+)\s*/;
14const REQUIREMENTS_STANDARD_REGEX = /^\s*([^\s]+)/;
15/**
16 * Return true if the passed in line is a requirements.txt comment (starts with "#" which might be preceded by spaces).
17 */
18function lineIsComment(line) {
19 return REQUIREMENTS_COMMENT_REGEX.exec(line) !== null;
20}
21/**
22 * Execute the given `regex` against the line and return the first match. If there is no match, return `null`.
23 */
24function applyRegex(line, regex) {
25 const result = regex.exec(line);
26 if (result === null) {
27 return null;
28 }
29 return result[1];
30}
31/**
32 * Execute the `REQUIREMENTS_EDITABLE_SOURCE_REGEX` against a line and return the first result (or null if no match).
33 * This is used to find a requirements.txt line of a URL source (e.g. including a package from github).
34 */
35function extractEditableSource(line) {
36 return applyRegex(line, REQUIREMENTS_EDITABLE_SOURCE_REGEX);
37}
38/**
39 * Execute the `REQUIREMENTS_INCLUDE_PATH_REGEX` against a line and return the first result (or null if no match).
40 * This is used to find a requirements.txt line that includes another requirements file.
41 */
42function extractIncludedRequirementsPath(line) {
43 return applyRegex(line, REQUIREMENTS_INCLUDE_PATH_REGEX);
44}
45/**
46 * Execute the `REQUIREMENTS_STANDARD_REGEX` against a line and return the first result (or null if no match).
47 * This is used to find "standard" requirements.txt lines.
48 */
49function extractStandardRequirements(line) {
50 return applyRegex(line, REQUIREMENTS_STANDARD_REGEX);
51}
52/**
53 * Split a requirement line into name and then version. For example "package==1.0.1" => ["package", "==1.0.1"]
54 * The version specifier can be `==`, `<=`, `>=`, `~=`, `<` or `>`.
55 */
56function splitStandardRequirementVersion(requirement) {
57 let firstSplitterIndex = -1;
58 for (let splitter of ['==', '<=', '>=', '~=', '<', '>']) {
59 let splitterIndex = requirement.indexOf(splitter);
60 if (splitterIndex > -1 && (firstSplitterIndex === -1 || splitterIndex < firstSplitterIndex)) {
61 firstSplitterIndex = splitterIndex;
62 }
63 }
64 if (firstSplitterIndex !== -1) {
65 return [requirement.substring(0, firstSplitterIndex), requirement.substring(firstSplitterIndex)];
66 }
67 return [requirement, null];
68}
69/**
70 * Convert a list of classifiers to a Map between main classification and sub classification(s).
71 * e.g: ['A :: B', 'A :: C', 'D :: E'] => {'A': ['B', 'C'], 'D': ['E']}
72 */
73function buildClassifierMap(classifiers) {
74 const classifierMap = new Map();
75 for (let classifier of classifiers) {
76 let doubleColonPosition = classifier.indexOf('::');
77 let classifierKey = classifier.substring(0, doubleColonPosition).trim();
78 let classifierValue = classifier.substring(doubleColonPosition + 2).trim();
79 if (!classifierMap.has(classifierKey)) {
80 classifierMap.set(classifierKey, []);
81 }
82 classifierMap.get(classifierKey).push(classifierValue);
83 }
84 return classifierMap;
85}
86/**
87 * Each PyPI "Topic" might contain multiple levels of categorisation separated by "::". E.g.
88 * "Topic :: Category :: Secondary Category :: Tertiary Category". This will split into an array of strings of the same
89 * length as the number of categories, i.e. ["Category", "Secondary Category", "Tertiary Category"]
90 */
91function splitTopic(topics) {
92 return topics.split('::').map(topic => topic.trim());
93}
94/**
95 * Parse an array of PyPI formatted topics into unique lists, returns a tuple of top level and optionally second level
96 * topics. This is because PyPI will repeat top level Topics in sub topics, e.g. the list might contain:
97 * ["Topic :: Game", "Topic :: Game :: Arcade"] hence "Game" is defined twice.
98 */
99function parseTopics(topicsList) {
100 let primaryTopics = [];
101 let secondaryTopics = [];
102 for (let topics of topicsList) {
103 let splitTopics = splitTopic(topics);
104 if (splitTopics.length) {
105 if (!primaryTopics.includes(splitTopics[0]))
106 primaryTopics.push(splitTopics[0]);
107 if (splitTopics.length > 1) {
108 if (!secondaryTopics.includes(splitTopics[1]))
109 secondaryTopics.push(splitTopics[1]);
110 }
111 }
112 }
113 return [primaryTopics, secondaryTopics];
114}
115/**
116 * Convert a string containing an operating system name into an array of `OperatingSystem`s. In some instances the
117 * description may map to multiple `OperatingSystems`, e.g. "Unix" => Linux and macOS.
118 */
119function parseOperatingSystem(operatingSystem) {
120 if (operatingSystem.match(/windows/i)) {
121 return [OperatingSystem_1.default.windows];
122 }
123 if (operatingSystem.match(/unix/i)) {
124 return [OperatingSystem_1.default.linux, OperatingSystem_1.default.macos, OperatingSystem_1.default.unix];
125 }
126 if (operatingSystem.match(/linux/i)) {
127 return [OperatingSystem_1.default.linux];
128 }
129 if (operatingSystem.match(/macos/i) || operatingSystem.match(/mac os/i)) {
130 return [OperatingSystem_1.default.macos];
131 }
132 return [];
133}
134var RequirementType;
135(function (RequirementType) {
136 RequirementType[RequirementType["Named"] = 0] = "Named";
137 RequirementType[RequirementType["URL"] = 1] = "URL";
138})(RequirementType = exports.RequirementType || (exports.RequirementType = {}));
139/**
140 * Parser to be used on a directory with Python source code and (optionally) a `requirements.txt` file.
141 * If no `requirements.txt` file exists then the Parser will attempt to read requirements from the Python source code.
142 */
143class PythonParser extends Parser_1.default {
144 async parse() {
145 const files = this.glob(['**/*.py']);
146 if (!files.length) {
147 // no .py files so don't parse this directory
148 return null;
149 }
150 const pkg = new schema_1.SoftwarePackage();
151 pkg.runtimePlatform = 'Python';
152 if (this.folder) {
153 pkg.name = path_1.basename(this.folder);
154 }
155 let requirements;
156 if (this.exists('requirements.txt')) {
157 requirements = await this.parseRequirementsFile('requirements.txt');
158 }
159 else {
160 requirements = this.generateRequirementsFromSource();
161 }
162 for (let rawRequirement of requirements) {
163 if (rawRequirement.type === RequirementType.Named) {
164 pkg.softwareRequirements.push(await this.createPackage(rawRequirement));
165 }
166 else if (rawRequirement.type === RequirementType.URL) {
167 let sourceRequirement = new schema_1.SoftwareSourceCode();
168 sourceRequirement.runtimePlatform = 'Python';
169 sourceRequirement.codeRepository = rawRequirement.value;
170 }
171 }
172 return pkg;
173 }
174 /**
175 * Convert a `PythonRequirement` into a `SoftwarePackage` by augmenting with metadata from PyPI
176 */
177 async createPackage(requirement) {
178 const softwarePackage = new schema_1.SoftwarePackage();
179 softwarePackage.name = requirement.value;
180 softwarePackage.runtimePlatform = 'Python';
181 softwarePackage.programmingLanguages = [schema_1.ComputerLanguage.py];
182 if (requirement.version) {
183 softwarePackage.version = requirement.version;
184 }
185 const pyPiMetadata = await this.fetch(`https://pypi.org/pypi/${softwarePackage.name}/json`);
186 if (pyPiMetadata.info) {
187 if (pyPiMetadata.info.author) {
188 softwarePackage.authors.push(schema_1.Person.fromText(`${pyPiMetadata.info.author} <${pyPiMetadata.info.author_email}>`));
189 }
190 if (pyPiMetadata.info.project_url) {
191 softwarePackage.codeRepository = pyPiMetadata.info.project_url;
192 }
193 if (pyPiMetadata.info.classifiers) {
194 const classifiers = buildClassifierMap(pyPiMetadata.info.classifiers);
195 if (classifiers.has('Topic')) {
196 let [topics, subTopics] = parseTopics(classifiers.get('Topic'));
197 if (topics.length)
198 softwarePackage.applicationCategories = topics;
199 if (subTopics.length)
200 softwarePackage.applicationSubCategories = subTopics;
201 }
202 if (classifiers.has('Operating System')) {
203 const operatingSystems = [];
204 for (let operatingSystemDescription of classifiers.get('Operating System')) {
205 for (let operatingSystem of parseOperatingSystem(operatingSystemDescription)) {
206 if (!operatingSystems.includes(operatingSystem))
207 operatingSystems.push(operatingSystem);
208 }
209 }
210 softwarePackage.operatingSystems = operatingSystems;
211 }
212 }
213 if (pyPiMetadata.info.keywords)
214 softwarePackage.keywords = pyPiMetadata.info.keywords;
215 if (pyPiMetadata.info.license)
216 softwarePackage.license = pyPiMetadata.info.license;
217 if (pyPiMetadata.info.long_description) {
218 softwarePackage.description = pyPiMetadata.info.long_description;
219 }
220 else if (pyPiMetadata.info.description) {
221 softwarePackage.description = pyPiMetadata.info.description;
222 }
223 }
224 return softwarePackage;
225 }
226 /**
227 * Parse a `requirements.txt` file at `path` and return a list of `PythonRequirement`s
228 */
229 async parseRequirementsFile(path) {
230 const requirementsContent = this.read(path);
231 const allRequirementLines = requirementsContent.split('\n');
232 let requirements = [];
233 for (let line of allRequirementLines) {
234 if (lineIsComment(line)) {
235 continue;
236 }
237 let editableSource = extractEditableSource(line);
238 if (editableSource !== null) {
239 requirements.push({ value: editableSource, type: RequirementType.URL });
240 continue;
241 }
242 let includePath = extractIncludedRequirementsPath(line);
243 if (includePath !== null) {
244 let includedRequirements = await this.parseRequirementsFile(includePath);
245 requirements = requirements.concat(includedRequirements);
246 continue;
247 }
248 let standardRequirement = extractStandardRequirements(line);
249 if (standardRequirement !== null) {
250 let [requirementName, version] = splitStandardRequirementVersion(standardRequirement);
251 requirements.push({ value: requirementName, type: RequirementType.Named, version: version });
252 }
253 }
254 return requirements;
255 }
256 /**
257 * Parse Python source files are find any non-system imports, return this as an array of `PythonRequirement`s.
258 */
259 generateRequirementsFromSource() {
260 const nonSystemImports = this.findImports().filter(pythonImport => !PythonBuiltins_1.default.includes(pythonImport));
261 return nonSystemImports.map(nonSystemImport => {
262 return {
263 value: nonSystemImport, type: RequirementType.Named, version: ''
264 };
265 });
266 }
267 /**
268 * Parse Python source files are find all imports (including system imports).
269 */
270 findImports() {
271 const files = this.glob(['**/*.py']);
272 const imports = [];
273 if (files.length) {
274 for (let file of files) {
275 for (let importName of this.readImportsInFile(file)) {
276 if (!imports.includes(importName))
277 imports.push(importName);
278 }
279 }
280 }
281 return imports;
282 }
283 /**
284 * Parse Python a single Python source file for imports.
285 */
286 readImportsInFile(path) {
287 const fileContent = this.read(path);
288 const importRegex = /^\s*from ([\w_]+)|^\s*import ([\w_]+)/gm;
289 const imports = [];
290 const fileDirectory = path_1.dirname(path);
291 while (true) {
292 let match = importRegex.exec(fileContent);
293 if (!match)
294 break;
295 const pkg = match[1] || match[2];
296 if (this.glob([fileDirectory + '/' + pkg + '.py', fileDirectory + '/' + pkg + '/__init__.py']).length) {
297 continue;
298 }
299 if (!imports.includes(pkg))
300 imports.push(pkg);
301 }
302 return imports;
303 }
304}
305exports.default = PythonParser;