UNPKG

11.8 kBJavaScriptView Raw
1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
3 return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6exports.apiSchema = exports.opSchema = exports.makeJobSchema = exports.jobSchema = void 0;
7const os_1 = __importDefault(require("os"));
8const utils_1 = require("@terascope/utils");
9const cpuCount = os_1.default.cpus().length;
10const workers = cpuCount < 5 ? cpuCount : 5;
11function jobSchema(context) {
12 const schemas = {
13 analytics: {
14 default: true,
15 doc: [
16 'logs the time it took in milliseconds for each action,',
17 'as well as the number of docs it receives',
18 ].join(' '),
19 format: Boolean,
20 },
21 performance_metrics: {
22 default: false,
23 doc: 'logs performance metrics, including gc, loop and usage metrics for nodejs',
24 format: Boolean,
25 },
26 assets: {
27 default: null,
28 doc: 'An array of actions to execute, typically the first is a reader '
29 + 'and the last is a sender with any number of processing function in-between',
30 format(arr) {
31 if (arr != null) {
32 if (!Array.isArray(arr)) {
33 throw new Error('assets need to be of type array');
34 }
35 if (!arr.every(utils_1.isString)) {
36 throw new Error('assets needs to be an array of strings');
37 }
38 }
39 },
40 },
41 autorecover: {
42 default: false,
43 doc: 'Automatically recover pending slices from the last stopped/completed execution. '
44 + 'The last state will always be passed to the slicer',
45 format: Boolean,
46 },
47 lifecycle: {
48 default: 'once',
49 doc: 'Job lifecycle behavior, determines if it should exit on completion or remain active',
50 format: ['once', 'persistent'],
51 },
52 max_retries: {
53 default: 3,
54 doc: [
55 'the number of times a worker will attempt to process',
56 'the same slice after a error has occurred',
57 ].join(' '),
58 format: 'nat',
59 },
60 name: {
61 default: 'Custom Job',
62 doc: 'Name for specific job',
63 format: 'required_String',
64 },
65 operations: {
66 default: [],
67 doc: 'An array of actions to execute, typically the first is a reader '
68 + 'and the last is a sender with '
69 + 'any number of processing function in-between',
70 format(arr) {
71 if (!(Array.isArray(arr) && arr.length >= 2)) {
72 throw new Error('Operations need to be of type array with at least two operations in it');
73 }
74 const connectorsObject = utils_1.getField(context.sysconfig.terafoundation, 'connectors', {});
75 const connectors = Object.values(connectorsObject);
76 const connections = utils_1.flatten(connectors.map((conn) => Object.keys(conn)));
77 for (const op of arr) {
78 if (!op || !utils_1.isPlainObject(op)) {
79 throw new Error(`Invalid Operation config in operations, got ${utils_1.getTypeOf(op)}`);
80 }
81 if (op.connection && !connections.includes(op.connection)) {
82 throw new Error(`Operation ${op._op} refers to connection "${op.connection}" which is unavailable`);
83 }
84 }
85 },
86 },
87 apis: {
88 default: [],
89 doc: `An array of apis to load and any configurations they require.
90 Validated similar to operations, with the exception of no apis are required.
91 The _name property is required, and it is required to be unqiue
92 but can be suffixed with a identifier by using the format "example:0",
93 anything after the ":" is stripped out when searching for the file or folder.`,
94 format(arr) {
95 if (!Array.isArray(arr)) {
96 throw new Error('APIs is required to be an array');
97 }
98 const connectorsObject = utils_1.getField(context.sysconfig.terafoundation, 'connectors', {});
99 const connectors = Object.values(connectorsObject);
100 const connections = utils_1.flatten(connectors.map((conn) => Object.keys(conn)));
101 const names = [];
102 for (const api of arr) {
103 if (!api || !utils_1.isPlainObject(api)) {
104 throw new Error(`Invalid API config in apis, got ${utils_1.getTypeOf(api)}`);
105 }
106 if (!api._name) {
107 throw new Error('API requires an _name');
108 }
109 if (names.includes(api._name)) {
110 throw new Error(`Duplicate API configurations for ${api._name} found`);
111 }
112 names.push(api._name);
113 if (api.connection && !connections.includes(api.connection)) {
114 throw new Error(`API ${api._name} refers to connection "${api.connection}" which is unavailable`);
115 }
116 }
117 },
118 },
119 probation_window: {
120 default: 300000,
121 doc: 'time in ms that the execution controller checks for failed slices, '
122 + 'if there are none then it updates the state of the execution to running '
123 + '(this is only when lifecycle is set to persistent)',
124 format: 'duration',
125 },
126 slicers: {
127 default: 1,
128 doc: 'how many parallel slicer contexts that will run within the slicer',
129 format: 'positive_int'
130 },
131 workers: {
132 default: workers,
133 doc: 'the number of workers dedicated for the job',
134 format: 'positive_int'
135 },
136 labels: {
137 default: null,
138 doc: 'An array of arrays containing key value pairs used to label kubetnetes resources.',
139 // TODO: Refactor this out as format, copied from env_vars
140 format(obj) {
141 if (obj != null) {
142 if (!utils_1.isPlainObject(obj)) {
143 throw new Error('must be object');
144 }
145 Object.entries(obj).forEach(([key, val]) => {
146 if (key == null || key === '') {
147 throw new Error('key must be not empty');
148 }
149 if (val == null || val === '') {
150 throw new Error(`value for key "${key}" must be not empty`);
151 }
152 });
153 }
154 },
155 },
156 env_vars: {
157 default: {},
158 doc: 'environment variables to set on each the teraslice worker, in the format, { "EXAMPLE": "test" }',
159 format(obj) {
160 if (!utils_1.isPlainObject(obj)) {
161 throw new Error('must be object');
162 }
163 Object.entries(obj).forEach(([key, val]) => {
164 if (key == null || key === '') {
165 throw new Error('key must be not empty');
166 }
167 if (val == null || val === '') {
168 throw new Error(`value for key "${key}" must be not empty`);
169 }
170 });
171 },
172 }
173 };
174 const clusteringType = context.sysconfig.teraslice.cluster_manager_type;
175 if (clusteringType === 'kubernetes') {
176 schemas.targets = {
177 default: [],
178 doc: 'array of key/value labels used for targetting teraslice jobs to nodes',
179 format(arr) {
180 if (!Array.isArray(arr)) {
181 throw new Error('must be array');
182 }
183 arr.forEach((label) => {
184 if (label.key == null) {
185 throw new Error(`needs to have a key: ${label}`);
186 }
187 if (label.value == null) {
188 throw new Error(`needs to have a value: ${label}`);
189 }
190 });
191 },
192 };
193 schemas.cpu = {
194 doc: 'number of cpus to reserve per teraslice worker in kubernetes',
195 default: undefined,
196 format: 'Number',
197 };
198 schemas.cpu_execution_controller = {
199 doc: 'number of cpus to reserve per teraslice execution controller in kubernetes',
200 default: undefined,
201 format: 'Number',
202 };
203 schemas.memory = {
204 doc: 'memory, in bytes, to reserve per teraslice worker in kubernetes',
205 default: undefined,
206 format: 'Number',
207 };
208 schemas.memory_execution_controller = {
209 doc: 'memory, in bytes, to reserve per teraslice execution controller in kubernetes',
210 default: undefined,
211 format: 'Number',
212 };
213 schemas.volumes = {
214 default: [],
215 doc: 'array of volumes to be mounted by job workers',
216 format(arr) {
217 if (!Array.isArray(arr)) {
218 throw new Error('must be array');
219 }
220 arr.forEach((volume) => {
221 if (volume.name == null) {
222 throw new Error(`needs to have a name: ${volume}`);
223 }
224 if (volume.path == null) {
225 throw new Error(`needs to have a path: ${volume}`);
226 }
227 });
228 },
229 };
230 schemas.kubernetes_image = {
231 doc: 'Specify a custom image name for kubernetes, this only applies to kubernetes systems',
232 default: undefined,
233 format: 'optional_String',
234 };
235 }
236 return schemas;
237}
238exports.jobSchema = jobSchema;
239exports.makeJobSchema = jobSchema;
240exports.opSchema = {
241 _op: {
242 default: '',
243 doc: 'Name of operation, , it must reflect the name of the file or folder',
244 format: 'required_String',
245 },
246 _encoding: {
247 doc: 'Used for specifying the data encoding type when using `DataEntity.fromBuffer`. Defaults to `json`.',
248 default: utils_1.DataEncoding.JSON,
249 format: utils_1.dataEncodings,
250 },
251 _dead_letter_action: {
252 doc: [
253 'This action will specify what to do when failing to parse or transform a record.',
254 'The following builtin actions are supported:',
255 ' - "throw": throw the original error​​',
256 ' - "log": log the error and the data​​',
257 ' - "none": (default) skip the error entirely',
258 'If none of the actions are specified it will try and use a registered Dead Letter Queue API under that name.',
259 'The API must be already be created by a operation before it can used.'
260 ].join('\n'),
261 default: 'throw',
262 format: 'optional_String',
263 },
264};
265exports.apiSchema = {
266 _name: {
267 default: '',
268 doc: `The _name property is required, and it is required to be unqiue
269 but can be suffixed with a identifier by using the format "example:0",
270 anything after the ":" is stripped out when searching for the file or folder.`,
271 format: 'required_String',
272 },
273};
274//# sourceMappingURL=job-schemas.js.map
\No newline at end of file