1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 | import { readFileSync } from 'fs';
|
17 | import * as path from 'path';
|
18 | import * as readline from 'readline';
|
19 | import { Command } from 'commander';
|
20 | import { HTMLProcessingParser, loadDefaultParsers, loadDefaultJapaneseParser, } from './index.js';
|
21 | const CLI_VERSION = '0.6.2';
|
22 | const defaultParsers = loadDefaultParsers();
|
23 |
|
24 |
|
25 |
|
26 |
|
27 | export const cli = (argv) => {
|
28 | const program = new Command('budoux');
|
29 | program.usage('[-h] [-H] [-d STR] [-t THRES] [-m JSON] [-l LANG] [-V] [TXT]');
|
30 | program.description('BudouX is the successor to Budou, the machine learning powered line break organizer tool.');
|
31 | program
|
32 | .option('-H, --html', 'HTML mode', false)
|
33 | .option('-d, --delim <str>', 'output delimiter in TEXT mode', '---')
|
34 | .option('-m, --model <json>', 'model file path')
|
35 | .option('-l, --lang <str>', `language model to use. -m and --model will be prioritized if any.\navailable languages: ${[
|
36 | ...defaultParsers.keys(),
|
37 | ].join(', ')}`)
|
38 | .argument('[txt]', 'text');
|
39 | program.version(CLI_VERSION);
|
40 | program.parse(argv);
|
41 | const options = program.opts();
|
42 | const { lang, model, delim, html } = options;
|
43 | const { args } = program;
|
44 | const parser = model
|
45 | ? loadCustomParser(model)
|
46 | : lang && defaultParsers.has(lang)
|
47 | ? defaultParsers.get(lang)
|
48 | : loadDefaultJapaneseParser();
|
49 | switch (args.length) {
|
50 | case 0: {
|
51 | const rl = readline.createInterface({
|
52 | input: process.stdin,
|
53 | });
|
54 | let stdin = '';
|
55 | rl.on('line', line => {
|
56 | stdin += line + '\n';
|
57 | });
|
58 | process.stdin.on('end', () => {
|
59 | outputParsedTexts(parser, html, delim, [stdin]);
|
60 | });
|
61 | break;
|
62 | }
|
63 | case 1: {
|
64 | outputParsedTexts(parser, html, delim, args);
|
65 | break;
|
66 | }
|
67 | default: {
|
68 | throw new Error('Too many arguments. Please, pass the only one argument.');
|
69 | }
|
70 | }
|
71 | };
|
72 |
|
73 |
|
74 |
|
75 |
|
76 |
|
77 |
|
78 |
|
79 | const outputParsedTexts = (parser, html, delim, args) => {
|
80 | if (html) {
|
81 | const text = args[0];
|
82 | const output = parser.translateHTMLString(text);
|
83 | console.log(output);
|
84 | }
|
85 | else {
|
86 | const splitedTextsByNewLine = args[0]
|
87 | .split(/\r?\n/)
|
88 | .filter(text => text !== '');
|
89 | splitedTextsByNewLine.forEach((text, index) => {
|
90 | const parsedTexts = parser.parse(text);
|
91 | parsedTexts.forEach(parsedText => {
|
92 | console.log(parsedText);
|
93 | });
|
94 | if (index + 1 !== splitedTextsByNewLine.length)
|
95 | console.log(delim);
|
96 | });
|
97 | }
|
98 | };
|
99 |
|
100 |
|
101 |
|
102 |
|
103 | const loadCustomParser = (modelPath) => {
|
104 | const file = readFileSync(path.resolve(modelPath)).toString();
|
105 | const model = JSON.parse(file);
|
106 | return new HTMLProcessingParser(model);
|
107 | };
|
108 |
|
\ | No newline at end of file |