1 |
|
2 |
|
3 |
|
4 | import * as z from 'zod';
|
5 | import { ActivityTypes, Channels, Middleware, TurnContext } from 'botbuilder-core';
|
6 | import { parseDocument } from 'htmlparser2';
|
7 |
|
8 | const supportedChannels = new Set<string>([Channels.DirectlineSpeech, Channels.Emulator, Channels.Telephony]);
|
9 |
|
10 |
|
11 | function hasTag(tag: string, nodes: unknown[]): boolean {
|
12 | while (nodes.length) {
|
13 | const item = nodes.shift();
|
14 |
|
15 | if (
|
16 | z
|
17 | .object({ tagName: z.string(), children: z.array(z.unknown()) })
|
18 | .partial()
|
19 | .nonstrict()
|
20 | .check(item)
|
21 | ) {
|
22 | if (item.tagName === tag) {
|
23 | return true;
|
24 | }
|
25 |
|
26 | if (item.children) {
|
27 | nodes.push(...item.children);
|
28 | }
|
29 | }
|
30 | }
|
31 |
|
32 | return false;
|
33 | }
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 | export class SetSpeakMiddleware implements Middleware {
|
40 | |
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 | constructor(private readonly voiceName: string | null, private readonly fallbackToTextForSpeak: boolean) {}
|
47 |
|
48 | |
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 | onTurn(turnContext: TurnContext, next: () => Promise<void>): Promise<void> {
|
56 | turnContext.onSendActivities(async (_ctx, activities, next) => {
|
57 | await Promise.all(
|
58 | activities.map(async (activity) => {
|
59 | if (activity.type !== ActivityTypes.Message) {
|
60 | return;
|
61 | }
|
62 |
|
63 | if (this.fallbackToTextForSpeak && !activity.speak) {
|
64 | activity.speak = activity.text;
|
65 | }
|
66 |
|
67 | const channelId = turnContext.activity.channelId?.trim().toLowerCase();
|
68 |
|
69 | if (activity.speak && this.voiceName !== null && supportedChannels.has(channelId)) {
|
70 | const nodes = parseDocument(activity.speak).childNodes;
|
71 |
|
72 | if (!hasTag('speak', nodes.slice())) {
|
73 | if (!hasTag('voice', nodes.slice())) {
|
74 | activity.speak = `<voice name='${this.voiceName}'>${activity.speak}</voice>`;
|
75 | }
|
76 |
|
77 | activity.speak = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${
|
78 | activity.locale ?? 'en-US'
|
79 | }'>${activity.speak}</speak>`;
|
80 | }
|
81 | }
|
82 | })
|
83 | );
|
84 |
|
85 | return next();
|
86 | });
|
87 |
|
88 | return next();
|
89 | }
|
90 | }
|