UNPKG

3.25 kBPlain TextView Raw
1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4import * as z from 'zod';
5import { ActivityTypes, Channels, Middleware, TurnContext } from 'botbuilder-core';
6import { parseDocument } from 'htmlparser2';
7
8const supportedChannels = new Set<string>([Channels.DirectlineSpeech, Channels.Emulator, Channels.Telephony]);
9
10// Iterate through `obj` and all children in an attempt to locale a key `tag`
11function hasTag(tag: string, nodes: unknown[]): boolean {
12 while (nodes.length) {
13 const item = nodes.shift();
14
15 if (
16 z
17 .object({ tagName: z.string(), children: z.array(z.unknown()) })
18 .partial()
19 .nonstrict()
20 .check(item)
21 ) {
22 if (item.tagName === tag) {
23 return true;
24 }
25
26 if (item.children) {
27 nodes.push(...item.children);
28 }
29 }
30 }
31
32 return false;
33}
34
35/**
36 * Support the DirectLine speech and telephony channels to ensure the appropriate SSML tags are set on the
37 * Activity Speak property.
38 */
39export class SetSpeakMiddleware implements Middleware {
40 /**
41 * Initializes a new instance of the SetSpeakMiddleware class.
42 *
43 * @param voiceName The SSML voice name attribute value.
44 * @param fallbackToTextForSpeak true if an empty Activity.Speak is populated with Activity.Text.
45 */
46 constructor(private readonly voiceName: string | null, private readonly fallbackToTextForSpeak: boolean) {}
47
48 /**
49 * Processes an incoming activity.
50 *
51 * @param turnContext The context object for this turn.
52 * @param next The delegate to call to continue the bot middleware pipeline.
53 * @returns A promise representing the async operation.
54 */
55 onTurn(turnContext: TurnContext, next: () => Promise<void>): Promise<void> {
56 turnContext.onSendActivities(async (_ctx, activities, next) => {
57 await Promise.all(
58 activities.map(async (activity) => {
59 if (activity.type !== ActivityTypes.Message) {
60 return;
61 }
62
63 if (this.fallbackToTextForSpeak && !activity.speak) {
64 activity.speak = activity.text;
65 }
66
67 const channelId = turnContext.activity.channelId?.trim().toLowerCase();
68
69 if (activity.speak && this.voiceName !== null && supportedChannels.has(channelId)) {
70 const nodes = parseDocument(activity.speak).childNodes;
71
72 if (!hasTag('speak', nodes.slice())) {
73 if (!hasTag('voice', nodes.slice())) {
74 activity.speak = `<voice name='${this.voiceName}'>${activity.speak}</voice>`;
75 }
76
77 activity.speak = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${
78 activity.locale ?? 'en-US'
79 }'>${activity.speak}</speak>`;
80 }
81 }
82 })
83 );
84
85 return next();
86 });
87
88 return next();
89 }
90}