UNPKG

50.5 kBSource Map (JSON)View Raw
1{"version":3,"sources":["webpack:///webpack/bootstrap","webpack:///./packages/stt-adapters/generate-entities-ranges/index.js","webpack:///./packages/stt-adapters/bbc-kaldi/group-words-by-speakers.js","webpack:///./packages/stt-adapters/bbc-kaldi/index.js","webpack:///./packages/stt-adapters/autoEdit2/index.js","webpack:///./packages/stt-adapters/speechmatics/index.js","webpack:///./packages/stt-adapters/amazon-transcribe/group-words-by-speakers.js","webpack:///./packages/stt-adapters/amazon-transcribe/index.js","webpack:///./packages/stt-adapters/ibm/index.js","webpack:///./packages/stt-adapters/digital-paper-edit/group-words-by-speakers.js","webpack:///./packages/stt-adapters/digital-paper-edit/index.js","webpack:///./packages/stt-adapters/create-entity-map/index.js","webpack:///./packages/stt-adapters/index.js"],"names":["installedModules","__webpack_require__","moduleId","exports","module","i","l","modules","call","m","c","d","name","getter","o","Object","defineProperty","enumerable","get","r","Symbol","toStringTag","value","t","mode","__esModule","ns","create","key","bind","n","object","property","prototype","hasOwnProperty","p","s","generateEntitiesRanges","words","wordAttributeName","position","map","word","result","start","end","confidence","text","offset","length","Math","random","toString","substring","groupWordsInParagraphsBySpeakers","segments","wordsWithSpeakers","currentSpeaker","speaker","results","paragraph","forEach","push","punct","trim","groupWordsBySpeaker","tmpWordsWithSpeakers","tmpSpeakerSegment","tmpSegment","find","seg","segEnd","duration","@type","@id","gender","findSegmentForWord","addSpeakerToEachWord","bbcKaldiToDraft","bbcKaldiJson","tmpWords","speakerSegmentation","retval","segmentation","test","join","groupWordsInParagraphs","speakerLabel","concat","draftJsContentBlockParagraph","type","data","entityRanges","autoEdit2ToDraft","autoEdit2Json","autoEditText","autoEditparagraph","autoEditLine","line","tmpWord","startTime","endTime","getSpeaker","speakers","speakerIdx","speechmaticsToDraft","speechmaticsJson","curatedWords","tmpSpeakers","parseFloat","element","index","time","toLowerCase","replace","paragraphStart","findSpeakerForWord","start_time","end_time","firstMatchingSegment","speaker_label","speakerLabels","groupedWords","groupWordsBySpeakerLabel","w","assign","addSpeakerLabelToWords","getBestAlternativeForWord","alternatives","reduce","prev","current","normalizeWord","currentWord","bestAlternative","content","mapPunctuationItemsToWords","itemsToRemove","punctuation","previousWord","punctuationContent","_objectSpread","appendPunctuationToPreviousWord","filter","item","includes","amazonTranscribeToDraft","amazonTranscribeJson","items","speaker_labels","wordsWithRemappedPunctuation","speakerGroup","groupSpeakerWordsInParagraphs","normalizedWord","ibmToDraft","ibmJson","ibmWords","ibmSpeakers","ibmResults","normalisedResults","normalisedWords","timestamps","ibmWord","ibmNormalisedWordsWithSpeakers","draftJsParagraphsResults","ibmParagraph","lines","speakerSegments","segStart","from","to","findSpeakerSegmentForWord","digitalPaperEditToDraft","digitalPaperEditTranscriptJson","paragraphs","flatten","list","a","b","Array","isArray","createEntityMap","blocks","block","flatEntityRanges","entityMap","mutability","sttJsonAdapter","transcriptData","sttJsonType","console","error"],"mappings":"2BACA,IAAAA,EAAA,GAGA,SAAAC,EAAAC,GAGA,GAAAF,EAAAE,GACA,OAAAF,EAAAE,GAAAC,QAGA,IAAAC,EAAAJ,EAAAE,GAAA,CACAG,EAAAH,EACAI,GAAA,EACAH,QAAA,IAUA,OANAI,EAAAL,GAAAM,KAAAJ,EAAAD,QAAAC,IAAAD,QAAAF,GAGAG,EAAAE,GAAA,EAGAF,EAAAD,QA0DA,OArDAF,EAAAQ,EAAAF,EAGAN,EAAAS,EAAAV,EAGAC,EAAAU,EAAA,SAAAR,EAAAS,EAAAC,GACAZ,EAAAa,EAAAX,EAAAS,IACAG,OAAAC,eAAAb,EAAAS,EAAA,CAA0CK,YAAA,EAAAC,IAAAL,KAK1CZ,EAAAkB,EAAA,SAAAhB,GACA,oBAAAiB,eAAAC,aACAN,OAAAC,eAAAb,EAAAiB,OAAAC,YAAA,CAAwDC,MAAA,WAExDP,OAAAC,eAAAb,EAAA,cAAiDmB,OAAA,KAQjDrB,EAAAsB,EAAA,SAAAD,EAAAE,GAEA,GADA,EAAAA,IAAAF,EAAArB,EAAAqB,IACA,EAAAE,EAAA,OAAAF,EACA,KAAAE,GAAA,iBAAAF,QAAAG,WAAA,OAAAH,EACA,IAAAI,EAAAX,OAAAY,OAAA,MAGA,GAFA1B,EAAAkB,EAAAO,GACAX,OAAAC,eAAAU,EAAA,WAAyCT,YAAA,EAAAK,UACzC,EAAAE,GAAA,iBAAAF,EAAA,QAAAM,KAAAN,EAAArB,EAAAU,EAAAe,EAAAE,EAAA,SAAAA,GAAgH,OAAAN,EAAAM,IAAqBC,KAAA,KAAAD,IACrI,OAAAF,GAIAzB,EAAA6B,EAAA,SAAA1B,GACA,IAAAS,EAAAT,KAAAqB,WACA,WAA2B,OAAArB,EAAA,SAC3B,WAAiC,OAAAA,GAEjC,OADAH,EAAAU,EAAAE,EAAA,IAAAA,GACAA,GAIAZ,EAAAa,EAAA,SAAAiB,EAAAC,GAAsD,OAAAjB,OAAAkB,UAAAC,eAAA1B,KAAAuB,EAAAC,IAGtD/B,EAAAkC,EAAA,GAIAlC,IAAAmC,EAAA,6CCtEA,IAsBeC,EAtBgB,SAACC,EAAOC,GACrC,IAAIC,EAAW,EAEf,OAAOF,EAAMG,IAAI,SAACC,GAChB,IAAMC,EAAS,CACbC,MAAOF,EAAKE,MACZC,IAAKH,EAAKG,IACVC,WAAYJ,EAAKI,WACjBC,KAAML,EAAKH,GACXS,OAAQR,EACRS,OAAQP,EAAKH,GAAmBU,OAChCrB,IAAKsB,KAAKC,SACPC,SAAS,IACTC,UAAU,IAKf,OAFAb,EAAWA,EAAWE,EAAKH,GAAmBU,OAAS,EAEhDN,KCuFIW,MA/Gf,SAA0ChB,EAAOiB,GAM/C,OA2BF,SAA6BC,GAAmB,IAC1CC,EAAiBD,EAAkB,GAAGE,QACpCC,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,IA0BhD,OAzBAF,EAAkBK,QAAQ,SAACnB,GAErBe,IAAmBf,EAAKgB,SAC1BE,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKqB,MAAQ,IAC/BH,EAAUF,QAAUD,IAKpBA,EAAiBf,EAAKgB,QAEtBE,EAAUb,KAAOa,EAAUb,KAAKiB,OAEhCL,EAAQG,KAAKF,IAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,UAElCpB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKqB,MAAQ,OAInCJ,EAAQG,KAAKF,GAEND,EA1DQM,CAWjB,SAA8B3B,EAAOiB,GACnC,IAAMW,EAAuB,GAQ7B,OAPA5B,EAAMuB,QAAQ,SAACnB,GACb,IAyFuBgB,EAzFjBS,EA4DV,SAA4BzB,EAAMa,GAEhC,IAAMa,EAAab,EAASc,KAAK,SAACC,GAChC,IAAMC,EAASD,EAAI1B,MAAQ0B,EAAIE,SAE/B,OAAS9B,EAAKE,OAAS0B,EAAI1B,OAAWF,EAAKG,KAAO0B,IALV,YAQtC,IAAAH,EAGK,CACLK,QAAS,UAGTf,QAAS,CAAEgB,MAAO,MAAOC,OAAQ,MAI5BP,EA/EmBQ,CAAmBlC,EAAMa,GAEnDb,EAAKgB,SAuFkBA,EAvFUS,EAAkBT,SAwFtCiB,OAAS,IAAMjB,EAAQ,OAvFpCQ,EAAqBJ,KAAKpB,KAGrBwB,EAtBmBW,CAAqBvC,EAAOiB,EAASA,YCkFlDuB,EAtDS,SAAAC,GAAgB,IAElCC,EADErB,EAAU,GAEZsB,EAAsB,KAgD1B,YA3CI,IAAAF,EAAaG,QAMfF,EAAWD,EAAazC,WACpB,IAAAyC,EAAaI,eACfF,EAAsBF,EAAaI,gBAPrCH,EAAWD,EAAaG,OAAO5C,WAC3B,IAAAyC,EAAaG,OAAOC,eACtBF,EAAsBF,EAAaG,OAAOC,gBASlB,OAAxBF,EA1CyB,SAAA3C,GAAS,IAChCqB,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IAiBnC,OAfAT,EAAMuB,QAAQ,SAAAnB,GAER,QAAQ0C,KAAK1C,EAAKqB,QACpBH,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,OACzBH,EAAUb,KAAOa,EAAUb,KAAKsC,KAAK,KACrC1B,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,UAItBJ,EAwBe2B,CAAuBN,GAEvB1B,EAAiC0B,EAAUC,IAG/CpB,QAAQ,SAACD,EAAWvD,GAGpC,QAAI,IAAAuD,EAAUtB,MAAM,GAAkB,CACpC,IAAIiD,EAAY,OAAAC,OAAWnF,GACC,OAAxB4E,IACFM,EAAe3B,EAAUF,SAG3B,IAAM+B,EAA+B,CACnC1C,KAAMa,EAAUb,KAChB2C,KAAM,YACNC,KAAM,CACJjC,QAAS6B,EACTjD,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5BgD,aAAcvD,EAAuBuB,EAAUtB,MAAO,UAExDqB,EAAQG,KAAK2B,MAIV9B,GCfMkC,EA1BU,SAACC,GAAkB,IACpCnC,EAAU,GAsBhB,OAxD6B,SAACoC,GAAiB,IACzCpC,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IA4BnC,OA1BAgD,EAAalC,QAAQ,SAACmC,GACpBA,EAAkBpC,UAAUC,QAAQ,SAACoC,GACnCA,EAAaC,KAAKrC,QAAQ,SAACnB,GAIzB,IAAMyD,EAAU,CACdpD,KAAML,EAAKK,KACXH,MAAOF,EAAK0D,UACZvD,IAAKH,EAAK2D,SAGR,QAAQjB,KAAK1C,EAAKK,OACpBa,EAAUtB,MAAMwB,KAAKqC,GACrBvC,EAAUb,KAAKe,KAAKpB,EAAKK,MACzBY,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKqC,GACrBvC,EAAUb,KAAKe,KAAKpB,EAAKK,aAM1BY,EAMmB2B,CADTQ,EAAc/C,MAGbc,QAAQ,SAACD,EAAWvD,GACpC,IAAMoF,EAA+B,CACnC1C,KAAMa,EAAUb,KAAKsC,KAAK,KAC1BK,KAAM,YACNC,KAAM,CACJjC,QAAO,OAAA8B,OAAUnF,GACjBiC,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5BgD,aAAcvD,EAAuBuB,EAAUtB,MAAO,SAGxDqB,EAAQG,KAAK2B,KAIR9B,GC7BH2C,EAAa,SAAC1D,EAAO2D,GACzB,IAAK,IAAIC,KAAcD,EAAU,CAC/B,IAAM7C,EAAU6C,EAASC,GACzB,GAAI5D,GAASc,EAAQd,MAAQA,EAAQc,EAAQb,IAC3C,OAAOa,EAAQ9C,KAInB,MAAO,OAuEM6F,EAhDa,SAACC,GAAqB,IAG5C1B,EAlBqB1C,EACnBqE,EA8BFC,EAfEjD,EAAU,GA4ChB,OA5DyBrB,EAmBIoE,EAAiBpE,MAlBxCqE,EAAe,GACrBrE,EAAMuB,QAAQ,SAACnB,GACT,QAAQ0C,KAAK1C,EAAK9B,OACpB+F,EAAaA,EAAa1D,OAAS,GAAGrC,KAAO+F,EAAaA,EAAa1D,OAAS,GAAGrC,KAAO8B,EAAK9B,KAC/F+F,EAAaA,EAAa1D,OAAS,GAAGuB,UAAYqC,WAAWF,EAAaA,EAAa1D,OAAS,GAAGuB,UAAYqC,WAAWnE,EAAK8B,WAAWpB,YAE1IuD,EAAa7C,KAAKpB,KAatBsC,GADAA,EAPO2B,GAQalE,IAAI,SAACqE,EAASC,GAChC,MAAQ,CACNnE,MAAOkE,EAAQE,KACfnE,KAAMgE,WAAWC,EAAQE,MAAQH,WAAWC,EAAQtC,WAAWpB,WAC/DN,WAAYgE,EAAQhE,WACpBJ,KAAMoE,EAAQlG,KAAKqG,cAAcC,QAAQ,SAAU,IACnDnD,MAAO+C,EAAQlG,KACfmG,MAAOA,KAMXH,GADAA,EAAcF,EAAiBH,UACL9D,IAAI,SAACqE,GAC7B,MAAQ,CACNlE,MAAOkE,EAAQE,KACfnE,KAAMgE,WAAWC,EAAQE,MAAQH,WAAWC,EAAQtC,WAAWpB,WAC/DxC,KAAMkG,EAAQlG,QAhFW,SAAC0B,GAAU,IAClCqB,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IAgBnC,OAdAT,EAAMuB,QAAQ,SAACnB,GAET,QAAQ0C,KAAK1C,EAAKqB,QACpBH,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,OACzBJ,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,UAItBJ,EAkEmB2B,CAAuBN,GAE/BnB,QAAQ,SAACD,GAAc,IACjCuD,EAAiBvD,EAAUtB,MAAM,GAAGM,MACpC6C,EAA+B,CACnC1C,KAAMa,EAAUb,KAAKsC,KAAK,KAC1BK,KAAM,YACNC,KAAM,CACJjC,QAAS4C,EAAWa,EAAgBP,GACpCtE,MAAOsB,EAAUtB,MACjBM,MAAOuE,GAITvB,aAAcvD,EAAuBuB,EAAUtB,MAAO,UAExDqB,EAAQG,KAAK2B,KAGR9B,GClGIyD,EAAqB,SAAC1E,EAAMa,GAAa,IAC9C6C,EAAYS,WAAWnE,EAAK2E,YAC5BhB,EAAUQ,WAAWnE,EAAK4E,UAC1BC,EAAuBhE,EAASc,KAAK,SAACC,GAC1C,OAAO8B,GAAaS,WAAWvC,EAAI+C,aAAehB,GAAWQ,WAAWvC,EAAIgD,YAJ1B,YAMhD,IAAAC,EACK,MAEAA,EAAqBC,cAAcN,QAAQ,OAAQ,KAQjDjD,EAAsB,SAAC3B,EAAOmF,GAGzC,OAtCsC,SAACnF,GAAU,IAC3CoF,EAAe,GACjBjE,EAAiB,GAarB,OAZAnB,EAAMuB,QAAQ,SAACnB,GACTA,EAAK8E,gBAAkB/D,EACzBiE,EAAaA,EAAazE,OAAS,GAAGX,MAAMwB,KAAKpB,IAEjDe,EAAiBf,EAAK8E,cAEtBE,EAAa5D,KAAK,CAChBJ,QAAShB,EAAK8E,cACdlF,MAAO,CAAEI,QAIRgF,EAuBAC,CAPsB,SAACrF,EAAOiB,GACrC,OAAOjB,EAAMG,IAAI,SAAAmF,GAAC,OAAI7G,OAAO8G,OAAOD,EAAG,CAAEJ,cAAiBJ,EAAmBQ,EAAGrE,OAItDuE,CAAuBxF,EAAOmF,EAAclE,wcC5BjE,IAUMwE,EAA4B,SAAArF,GACvC,MAAI,cAAc0C,KAAK1C,EAAKgD,MACnB3E,OAAO8G,OAAOnF,EAAKsF,aAAa,GAAI,CAAElF,WAAY,IAEzBJ,EAAKsF,aAAaC,OAAO,SACzDC,EACAC,GAEA,OAAOtB,WAAWqB,EAAKpF,YAAc+D,WAAWsB,EAAQrF,YACpDoF,EACAC,KAUFC,EAAgB,SAAAC,GACpB,IAAMC,EAAkBP,EAA0BM,GAElD,MAAO,CACLzF,MAAOiE,WAAWwB,EAAYhB,YAC9BxE,IAAKgE,WAAWwB,EAAYf,UAC5BvE,KAAMuF,EAAgBC,QACtBzF,WAAY+D,WAAWyB,EAAgBxF,cAgB9B0F,EAA6B,SAAAlG,GAAS,IAC3CmG,EAAgB,GAatB,OAZmBnG,EAAMG,IAAI,SAACC,EAAMqE,GAAU,MAE1B,gBAAdrE,EAAKgD,MACP+C,EAAc3E,KAAKiD,EAAQ,GAjBc,SAAC2B,EAAaC,GAC3D,IAAMC,EAAqBF,EAAYV,aAAa,GAAGO,QAEvD,OAAAM,EAAA,GACKF,EADL,CAEEX,aAAcW,EAAaX,aAAavF,IAAI,SAAAmF,GAAC,OAAAiB,EAAA,GACxCjB,EADwC,CAE3CW,QAASX,EAAEW,SAhDgB7F,EAgDYkG,EA/CpClG,EAAKwE,QAAQ,MAAO,OADI,IAAAxE,MA6DpBoG,CAAgCpG,EAFxBJ,EAAMyE,EAAQ,KAItBrE,IAIOqG,OAAO,SAACC,EAAMjC,GAC9B,OAAQ0B,EAAcQ,SAASlC,MA6EpBmC,EAhCiB,SAAAC,GAAwB,IAChDxF,EAAU,GACVqB,EAAWmE,EAAqBxF,QAAQyF,MACxC3B,EAAgB0B,EAAqBxF,QAAQ0F,eAC7CC,EAA+Bd,EAA2BxD,GAyBhE,YAxBqD,IAAlByC,EAjBC,SAACnF,EAAOmF,GAG5C,OAFuBxD,EAAoB3B,EAAOmF,GAE5BhF,IAAI,SAAC8G,GACzB,MAAO,CACLjH,MAAOiH,EAAajH,MAAMG,IAAI2F,GAC9BrF,KAAMwG,EAAajH,MAAMG,IAAI,SAACmF,GAAD,OAAOG,EAA0BH,GAAGW,UACjE7E,QAAS6F,EAAa7F,WAaxB8F,CAA8BF,EAA8B7B,GA5CjC,SAAAnF,GAAS,IAChCqB,EAAU,GACZC,EAAY,CACdtB,MAAO,GACPS,KAAM,IAiBR,OAfAT,EAAMuB,QAAQ,SAACnB,GAAS,IAChB6F,EAAUR,EAA0BrF,GAAM6F,QAC1CkB,EAAiBrB,EAAc1F,GACjC,QAAQ0C,KAAKmD,IACf3E,EAAUtB,MAAMwB,KAAK2F,GACrB7F,EAAUb,KAAKe,KAAKyE,GACpB5E,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAK2F,GACrB7F,EAAUb,KAAKe,KAAKyE,MAIjB5E,EAwBL2B,CACEgE,IAGczF,QAAQ,SAACD,EAAWvD,GACpC,IAAMoF,EAA+B,CACnC1C,KAAMa,EAAUb,KAAKsC,KAAK,KAC1BK,KAAM,YACNC,KAAM,CACJjC,QAASE,EAAUF,QAAV,WAAA8B,OAAgC5B,EAAUF,SAA1C,OAAA8B,OAA+DnF,GACxEiC,MAAOsB,EAAUtB,MACjBM,MAAOiE,WAAWjD,EAAUtB,MAAM,GAAGM,QAIvCgD,aAAcvD,EAAuBuB,EAAUtB,MAAO,SAExDqB,EAAQG,KAAK2B,KAGR9B,GCnDM+F,EA5FI,SAAAC,GAAW,IAgDAC,EAAUC,EAnCRC,EACtBC,EAqEFC,GAtEwBF,EAsEgBH,EAAQhG,QAAQ,GAAGA,QArEzDoG,EAAoB,GAC1BD,EAAWjG,QAAQ,SAAAlB,GAEjBoH,EAAkBjG,KAAgCnB,EAAOqF,aAAa,GAAGiC,WAdzDxH,IAAI,SAAAyH,GACpB,MAAO,CACLnH,KAAMmH,EAAQ,GACdtH,MAAOsH,EAAQ,GACfrH,IAAKqH,EAAQ,SAmBVH,GA+DT,OA9BoC,SAACI,GACnC,IAAMC,EAA2B,GAoBjC,OAnBAD,EAA+BtG,QAAQ,SAACwG,GACtC,IAAM5E,EAA+B,CACnC1C,KAAMsH,EAAa5H,IAAI,SAACC,GAAU,OAAOA,EAAKK,OAAQsC,KAAK,KAC3DK,KAAM,YACNC,KAAM,CAIJjC,QAAS2G,EAAa,GAAG3G,QACzBpB,MAAO+H,EACPzH,MAAOyH,EAAa,GAAGzH,OAIzBgD,aAAcvD,EAAuBgI,EAAc,SAErDD,EAAyBtG,KAAK2B,KAGzB2E,EArB2B,EAXRR,EAsC8BI,EAtCpBH,EAsCqCF,EAAQhG,QAAQ,GAAG0F,eArCrFO,EAASnH,IAAI,SAAA6H,GAClB,OAAOA,EAAM7H,IAAI,SAAAC,GAIf,OAFAA,EAAKgB,QAtBuB,SAAChB,EAAM6H,GACvC,IAAMnG,EAAamG,EAAgBlG,KAAK,SAAAC,GAAO,IACvCkG,EAAWlG,EAAImG,KACflG,EAASD,EAAIoG,GAEnB,OAAShI,EAAKE,QAAU4H,GAAc9H,EAAKG,MAAQ0B,IALM,YAQvD,IAAAH,EAGK,MAXkD,KAAAoB,OAc5CpB,EAAWV,SAQPiH,CAA0BjI,EAAMmH,GAExCnH,SCoCAY,MA3Ff,SAA0ChB,EAAOiB,GAM/C,OAuBF,SAA6BC,GAAmB,IAC1CC,EAAiBD,EAAkB,GAAGE,QACpCC,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,IA2BhD,OA1BAF,EAAkBK,QAAQ,SAACnB,GAErBe,IAAmBf,EAAKgB,gBACnBhB,EAAKgB,QACZE,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKK,KAAO,IAC9Ba,EAAUF,QAAUD,IAKpBA,EAAiBf,EAAKgB,QAEtBE,EAAUb,KAAOa,EAAUb,KAAKiB,OAEhCL,EAAQG,KAAKF,IAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,UAElCpB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKK,KAAO,OAIlCY,EAAQG,KAAKF,GAEND,EAvDQM,CAWjB,SAA8B3B,EAAOiB,GACnC,OAAOjB,EAAMG,IAAI,SAACC,GAGhB,OAFAA,EAAKgB,QA0DT,SAA4BhB,EAAMa,GAEhC,IAAMa,EAAab,EAASc,KAAK,SAACC,GAChC,OAAS5B,EAAKE,OAAS0B,EAAI1B,OAAWF,EAAKG,KAAOyB,EAAIzB,MAHd,YAMtC,IAAAuB,EAGK,MAGAA,EAAWV,QAtEHkB,CAAmBlC,EAAMa,GAEjCb,IAjBiBmC,CAAqBvC,EAAOiB,KCsEzCqH,EA5CiB,SAACC,GAAmC,IAC5DlH,EAAU,GACZsB,EAAsB,KAEpBD,EAAW6F,EAA+BvI,MAqChD,OAnCIuI,EAA+BC,aACjC7F,EAAsB4F,EAA+BC,aAGlD7F,EAGiB3B,EAAiC0B,EAAU6F,EAA+BC,YAnCnE,SAAAxI,GAAS,IAChCqB,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IAiBnC,OAfAT,EAAMuB,QAAQ,SAAAnB,GAER,QAAQ0C,KAAK1C,EAAKK,OACpBa,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKK,MACzBa,EAAUb,KAAOa,EAAUb,KAAKsC,KAAK,KACrC1B,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKK,SAItBY,EAce2B,CAAuBN,IAK3BnB,QAAQ,SAACD,EAAWvD,GAGpC,GAAIuD,EAAUtB,MAAM,GAAI,CACtB,IAAIiD,EAAY,OAAAC,OAAWnF,GACvB4E,IACFM,EAAe3B,EAAUF,SAG3B,IAAM+B,EAA+B,CACnC1C,KAAMa,EAAUb,KAChB2C,KAAM,YACNC,KAAM,CACJjC,QAAS6B,EACTjD,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5BgD,aAAcvD,EAAuBuB,EAAUtB,MAAO,SAExDqB,EAAQG,KAAK2B,MAIV9B,GClEHoH,EAAU,SAAAC,GAAI,OAAIA,EAAK/C,OAAO,SAACgD,EAAGC,GAAJ,OAAUD,EAAEzF,OAAO2F,MAAMC,QAAQF,GAAKH,EAAQG,GAAKA,IAAI,KAuB5EG,EAjBS,SAACC,GAAW,IAC5B1F,EAAe0F,EAAO7I,IAAI,SAAA8I,GAAK,OAAIA,EAAM3F,eACzC4F,EAAmBT,EAAQnF,GAE3B6F,EAAY,GAUlB,OARAD,EAAiB3H,QAAQ,SAAC8B,GACxB8F,EAAU9F,EAAK/D,KAAO,CACpB8D,KAAM,OACNgG,WAAY,UACZ/F,UAIG8F,GCoBME,UApCQ,SAACC,EAAgBC,GACtC,IAAIP,EACJ,OAAQO,GACR,IAAK,WAGH,MAAO,CAAEP,OAFTA,EAASxG,EAAgB8G,GAERH,UAAWJ,EAAgBC,IAC9C,IAAK,YAGH,MAAO,CAAEA,OAFTA,EAASzF,EAAiB+F,GAETH,UAAWJ,EAAgBC,IAC9C,IAAK,eAGH,MAAO,CAAEA,OAFTA,EAAS7E,EAAoBmF,GAEZH,UAAWJ,EAAgBC,IAC9C,IAAK,MAGH,MAAO,CAAEA,OAFTA,EAAS5B,EAAWkC,GAEHH,UAAWJ,EAAgBC,IAC9C,IAAK,UACH,OAAOM,EAET,IAAK,mBAGH,MAAO,CAAEN,OAFTA,EAASpC,EAAwB0C,GAEhBH,UAAWJ,EAAgBC,IAC9C,IAAK,mBAGH,MAAO,CAAEA,OAFTA,EAASV,EAAwBgB,GAEhBH,UAAWJ,EAAgBC,IAC9C,QAEEQ,QAAQC,MAAM","file":"sttJsonAdapter.js","sourcesContent":[" \t// The module cache\n \tvar installedModules = {};\n\n \t// The require function\n \tfunction __webpack_require__(moduleId) {\n\n \t\t// Check if module is in cache\n \t\tif(installedModules[moduleId]) {\n \t\t\treturn installedModules[moduleId].exports;\n \t\t}\n \t\t// Create a new module (and put it into the cache)\n \t\tvar module = installedModules[moduleId] = {\n \t\t\ti: moduleId,\n \t\t\tl: false,\n \t\t\texports: {}\n \t\t};\n\n \t\t// Execute the module function\n \t\tmodules[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n \t\t// Flag the module as loaded\n \t\tmodule.l = true;\n\n \t\t// Return the exports of the module\n \t\treturn module.exports;\n \t}\n\n\n \t// expose the modules object (__webpack_modules__)\n \t__webpack_require__.m = modules;\n\n \t// expose the module cache\n \t__webpack_require__.c = installedModules;\n\n \t// define getter function for harmony exports\n \t__webpack_require__.d = function(exports, name, getter) {\n \t\tif(!__webpack_require__.o(exports, name)) {\n \t\t\tObject.defineProperty(exports, name, { enumerable: true, get: getter });\n \t\t}\n \t};\n\n \t// define __esModule on exports\n \t__webpack_require__.r = function(exports) {\n \t\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n \t\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n \t\t}\n \t\tObject.defineProperty(exports, '__esModule', { value: true });\n \t};\n\n \t// create a fake namespace object\n \t// mode & 1: value is a module id, require it\n \t// mode & 2: merge all properties of value into the ns\n \t// mode & 4: return value when already ns object\n \t// mode & 8|1: behave like require\n \t__webpack_require__.t = function(value, mode) {\n \t\tif(mode & 1) value = __webpack_require__(value);\n \t\tif(mode & 8) return value;\n \t\tif((mode & 4) && typeof value === 'object' && value && value.__esModule) return value;\n \t\tvar ns = Object.create(null);\n \t\t__webpack_require__.r(ns);\n \t\tObject.defineProperty(ns, 'default', { enumerable: true, value: value });\n \t\tif(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key));\n \t\treturn ns;\n \t};\n\n \t// getDefaultExport function for compatibility with non-harmony modules\n \t__webpack_require__.n = function(module) {\n \t\tvar getter = module && module.__esModule ?\n \t\t\tfunction getDefault() { return module['default']; } :\n \t\t\tfunction getModuleExports() { return module; };\n \t\t__webpack_require__.d(getter, 'a', getter);\n \t\treturn getter;\n \t};\n\n \t// Object.prototype.hasOwnProperty.call\n \t__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };\n\n \t// __webpack_public_path__\n \t__webpack_require__.p = \"\";\n\n\n \t// Load entry module and return exports\n \treturn __webpack_require__(__webpack_require__.s = 51);\n","/**\n * Helper function to generate draft.js entities,\n * see unit test for example data structure\n * it adds offset and length to recognise word in draftjs\n */\n\n/**\n* @param {json} words - List of words\n* @param {string} wordAttributeName - eg 'punct' or 'text' or etc.\n* attribute for the word object containing the text. eg word ={ punct:'helo', ... }\n* or eg word ={ text:'helo', ... }\n*/\nconst generateEntitiesRanges = (words, wordAttributeName) => {\n let position = 0;\n\n return words.map((word) => {\n const result = {\n start: word.start,\n end: word.end,\n confidence: word.confidence,\n text: word[wordAttributeName],\n offset: position,\n length: word[wordAttributeName].length,\n key: Math.random()\n .toString(36)\n .substring(6),\n };\n // increase position counter - to determine word offset in paragraph\n position = position + word[wordAttributeName].length + 1;\n\n return result;\n });\n};\n\nexport default generateEntitiesRanges;\n","/**\nedge cases\n- more segments then words - not an issue if you start by matching words with segment\nand handle edge case where it doesn't find a match\n- more words then segments - orphan words\n */\nfunction groupWordsInParagraphsBySpeakers(words, segments) {\n // add speakers to each word\n const wordsWithSpeakers = addSpeakerToEachWord(words, segments.segments);\n // group words by speakers sequentially\n const result = groupWordsBySpeaker(wordsWithSpeakers);\n\n return result;\n};\n\n/**\n* Add speakers to each words\n* if it doesn't have add unknown attribute `U_UKN`\n* @param {*} words\n* @param {*} segments\n*/\nfunction addSpeakerToEachWord(words, segments) {\n const tmpWordsWithSpeakers = [];\n words.forEach((word) => {\n const tmpSpeakerSegment = findSegmentForWord(word, segments);\n\n word.speaker = formatSpeakerName(tmpSpeakerSegment.speaker);\n tmpWordsWithSpeakers.push(word);\n });\n\n return tmpWordsWithSpeakers;\n}\n\n/**\n * Groups Words by speaker attribute\n * @param {array} wordsWithSpeakers - same as kaldi words list but with a `speaker` label attribute on each word\n * @return {array} - list of paragraph objcts, with words, text and sepaker attributes.\n * where words is an array and the other two are strings.\n */\nfunction groupWordsBySpeaker(wordsWithSpeakers) {\n let currentSpeaker = wordsWithSpeakers[0].speaker;\n const results = [ ];\n let paragraph = { words: [], text: '', speaker: '' };\n wordsWithSpeakers.forEach((word) => {\n // if current speaker same as word speaker add words to paragraph\n if (currentSpeaker === word.speaker) {\n paragraph.words.push(word);\n paragraph.text += word.punct + ' ';\n paragraph.speaker = currentSpeaker;\n }\n // if it's not same speaker\n else {\n // update current speaker\n currentSpeaker = word.speaker;\n // remove spacing in text\n paragraph.text = paragraph.text.trim();\n //save previous paragraph\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: '', speaker: 'U_UKN' };\n // add words attributes to new\n paragraph.words.push(word);\n paragraph.text += word.punct + ' ';\n }\n });\n // add last paragraph\n results.push(paragraph);\n\n return results;\n}\n\n/**\n* Helper functions\n*/\n\n/**\n* given word start and end time attributes\n* looks for segment range that contains that word\n* if it doesn't find any it returns a segment with `UKN`\n* speaker attributes.\n* @param {object} word - word object\n* @param {array} segments - list of segments objects\n* @return {object} - a single segment whose range contains the word\n*/\nfunction findSegmentForWord(word, segments) {\n\n const tmpSegment = segments.find((seg) => {\n const segEnd = seg.start + seg.duration;\n\n return ((word.start >= seg.start) && (word.end <= segEnd));\n });\n // if find doesn't find any matches it returns an undefined\n if (tmpSegment === undefined) {\n // covering edge case orphan word not belonging to any segments\n // adding UKN speaker label\n return {\n '@type': 'Segment',\n // keeping both speaker id and gender as this is used later\n // to format speaker label combining the two\n speaker: { '@id': 'UKN', gender: 'U' }\n };\n } else {\n // find returns the first element that matches the criteria\n return tmpSegment;\n }\n}\n\n/**\n* formats kaldi speaker object into a string\n* Combining Gender and speaker Id\n* @param {object} speaker - BBC kaldi speaker object\n* @return {string} -\n*/\nfunction formatSpeakerName(speaker) {\n return speaker.gender + '_' + speaker['@id'];\n}\n\nexport default groupWordsInParagraphsBySpeakers;","/**\n * Convert BBC Kaldi json to draftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n *\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport groupWordsInParagraphsBySpeakers from './group-words-by-speakers.js';\n/**\n * groups words list from kaldi transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words opbjects from kaldi transcript\n */\n\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n words.forEach(word => {\n // if word contains punctuation\n if (/[.?!]/.test(word.punct)) {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n paragraph.text = paragraph.text.join(' ');\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n }\n });\n\n return results;\n};\n\nconst bbcKaldiToDraft = bbcKaldiJson => {\n const results = [];\n let tmpWords;\n let speakerSegmentation = null;\n let wordsByParagraphs = [];\n\n // BBC Octo Labs API Response wraps Kaldi response around retval,\n // while kaldi contains word attribute at root\n if (bbcKaldiJson.retval !== undefined) {\n tmpWords = bbcKaldiJson.retval.words;\n if (bbcKaldiJson.retval.segmentation !== undefined) {\n speakerSegmentation = bbcKaldiJson.retval.segmentation;\n }\n } else {\n tmpWords = bbcKaldiJson.words;\n if (bbcKaldiJson.segmentation !== undefined) {\n speakerSegmentation = bbcKaldiJson.segmentation;\n }\n }\n\n if (speakerSegmentation === null) {\n wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n } else {\n wordsByParagraphs = groupWordsInParagraphsBySpeakers(tmpWords, speakerSegmentation);\n }\n\n wordsByParagraphs.forEach((paragraph, i) => {\n // if paragraph contain words\n // eg sometimes the speaker segmentation might not contain words :man-shrugging:\n if (paragraph.words[0] !== undefined) {\n let speakerLabel = `TBC ${ i }`;\n if (speakerSegmentation !== null) {\n speakerLabel = paragraph.speaker;\n }\n\n const draftJsContentBlockParagraph = {\n text: paragraph.text,\n type: 'paragraph',\n data: {\n speaker: speakerLabel,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'punct') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n }\n });\n\n return results;\n};\n\nexport default bbcKaldiToDraft;\n","/**\n * Convert autoEdit2 Json to draftJS\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index';\n\n/**\n * groups words list from autoEdit transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words objects from autoEdit transcript\n */\n\nconst groupWordsInParagraphs = (autoEditText) => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n autoEditText.forEach((autoEditparagraph) => {\n autoEditparagraph.paragraph.forEach((autoEditLine) => {\n autoEditLine.line.forEach((word) => {\n // adjusting time reference attributes from\n // `startTime` `endTime` to `start` `end`\n // for word object\n const tmpWord = {\n text: word.text,\n start: word.startTime,\n end: word.endTime,\n };\n // if word contains punctuation\n if (/[.?!]/.test(word.text)) {\n paragraph.words.push(tmpWord);\n paragraph.text.push(word.text);\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(tmpWord);\n paragraph.text.push(word.text);\n }\n });\n });\n });\n\n return results;\n};\n\nconst autoEdit2ToDraft = (autoEdit2Json) => {\n const results = [];\n const tmpWords = autoEdit2Json.text;\n const wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n\n wordsByParagraphs.forEach((paragraph, i) => {\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: `TBC ${ i }`,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text'),\n };\n // console.log(JSON.stringify(draftJsContentBlockParagraph,null,2))\n results.push(draftJsContentBlockParagraph);\n });\n\n // console.log(JSON.stringify(results,null,2))\n return results;\n};\n\nexport default autoEdit2ToDraft;\n","/**\n * Convert Speechmatics Json to DraftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\n\n/**\n * groups words list from speechmatics based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @todo As this function is also used in the bbc-kaldi adapter, should it be refactored into its own file?\n * @param {array} words - array of words objects from speechmatics transcript\n */\nconst groupWordsInParagraphs = (words) => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n words.forEach((word) => {\n // if word contains punctuation\n if (/[.?!]/.test(word.punct)) {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n }\n });\n\n return results;\n};\n\n/**\n * Determines the speaker of a paragraph by comparing the start time of the paragraph with\n * the speaker times.\n * @param {float} start - Starting point of paragraph\n * @param {array} speakers - list of all speakers with start and end time\n */\nconst getSpeaker = (start, speakers) => {\n for (var speakerIdx in speakers) {\n const speaker = speakers[speakerIdx];\n if (start >= speaker.start & start < speaker.end) {\n return speaker.name;\n }\n }\n\n return 'UNK';\n};\n\n/**\n * Speechmatics treats punctuation as own words. This function merges punctuations with\n * the pevious word and adjusts the total duration of the word.\n * @param {array} words - array of words objects from speechmatics transcript\n */\nconst curatePunctuation = (words) => {\n const curatedWords = [];\n words.forEach((word) => {\n if (/[.?!]/.test(word.name)) {\n curatedWords[curatedWords.length - 1].name = curatedWords[curatedWords.length - 1].name + word.name;\n curatedWords[curatedWords.length - 1].duration = (parseFloat(curatedWords[curatedWords.length - 1].duration) + parseFloat(word.duration)).toString();\n } else {\n curatedWords.push(word);\n }\n }\n );\n\n return curatedWords;\n};\n\nconst speechmaticsToDraft = (speechmaticsJson) => {\n const results = [];\n\n let tmpWords;\n tmpWords = curatePunctuation(speechmaticsJson.words);\n tmpWords = tmpWords.map((element, index) => {\n return ({\n start: element.time,\n end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),\n confidence: element.confidence,\n word: element.name.toLowerCase().replace(/[.?!]/g, ''),\n punct: element.name,\n index: index,\n });\n });\n\n let tmpSpeakers;\n tmpSpeakers = speechmaticsJson.speakers;\n tmpSpeakers = tmpSpeakers.map((element) => {\n return ({\n start: element.time,\n end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),\n name: element.name,\n });\n });\n\n const wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n\n wordsByParagraphs.forEach((paragraph) => {\n const paragraphStart = paragraph.words[0].start;\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: getSpeaker(paragraphStart, tmpSpeakers),\n words: paragraph.words,\n start: paragraphStart\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'punct'), // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n });\n\n return results;\n};\n\nexport default speechmaticsToDraft;\n","export const groupWordsBySpeakerLabel = (words) => {\n const groupedWords = [];\n let currentSpeaker = '';\n words.forEach((word) => {\n if (word.speaker_label === currentSpeaker) {\n groupedWords[groupedWords.length - 1].words.push(word);\n } else {\n currentSpeaker = word.speaker_label;\n // start new speaker block\n groupedWords.push({\n speaker: word.speaker_label,\n words: [ word ] });\n }\n });\n\n return groupedWords;\n};\n\nexport const findSpeakerForWord = (word, segments) => {\n const startTime = parseFloat(word.start_time);\n const endTime = parseFloat(word.end_time);\n const firstMatchingSegment = segments.find((seg) => {\n return startTime >= parseFloat(seg.start_time) && endTime <= parseFloat(seg.end_time);\n });\n if (firstMatchingSegment === undefined) {\n return 'UKN';\n } else {\n return firstMatchingSegment.speaker_label.replace('spk_', '');\n }\n};\n\nconst addSpeakerLabelToWords = (words, segments) => {\n return words.map(w => Object.assign(w, { 'speaker_label': findSpeakerForWord(w, segments) }));\n};\n\nexport const groupWordsBySpeaker = (words, speakerLabels) => {\n const wordsWithSpeakers = addSpeakerLabelToWords(words, speakerLabels.segments);\n\n return groupWordsBySpeakerLabel(wordsWithSpeakers);\n};","/**\n * Converts AWS Transcribe Json to DraftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport { groupWordsBySpeaker } from './group-words-by-speakers';\n\nexport const stripLeadingSpace = word => {\n return word.replace(/^\\s/, '');\n};\n\n/**\n * @param {json} words - List of words\n * @param {string} wordAttributeName - eg 'punct' or 'text' or etc.\n * attribute for the word object containing the text. eg word ={ punct:'helo', ... }\n * or eg word ={ text:'helo', ... }\n */\nexport const getBestAlternativeForWord = word => {\n if (/punctuation/.test(word.type)) {\n return Object.assign(word.alternatives[0], { confidence: 1 }); //Transcribe doesn't provide a confidence for punctuation\n }\n const wordWithHighestConfidence = word.alternatives.reduce(function(\n prev,\n current\n ) {\n return parseFloat(prev.confidence) > parseFloat(current.confidence)\n ? prev\n : current;\n });\n\n return wordWithHighestConfidence;\n};\n\n/**\n * Normalizes words so they can be used in\n * the generic generateEntitiesRanges() method\n **/\nconst normalizeWord = currentWord => {\n const bestAlternative = getBestAlternativeForWord(currentWord);\n\n return {\n start: parseFloat(currentWord.start_time),\n end: parseFloat(currentWord.end_time),\n text: bestAlternative.content,\n confidence: parseFloat(bestAlternative.confidence)\n };\n};\n\nexport const appendPunctuationToPreviousWord = (punctuation, previousWord) => {\n const punctuationContent = punctuation.alternatives[0].content;\n\n return {\n ...previousWord,\n alternatives: previousWord.alternatives.map(w => ({\n ...w,\n content: w.content + stripLeadingSpace(punctuationContent)\n }))\n };\n};\n\nexport const mapPunctuationItemsToWords = words => {\n const itemsToRemove = [];\n const dirtyArray = words.map((word, index) => {\n let previousWord = {};\n if (word.type === 'punctuation') {\n itemsToRemove.push(index - 1);\n previousWord = words[index - 1];\n\n return appendPunctuationToPreviousWord(word, previousWord);\n } else {\n return word;\n }\n });\n\n return dirtyArray.filter((item, index) => {\n return !itemsToRemove.includes(index);\n });\n};\n\n/**\n * groups words list from amazon transcribe transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words objects from kaldi transcript\n */\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = {\n words: [],\n text: []\n };\n words.forEach((word) => {\n const content = getBestAlternativeForWord(word).content;\n const normalizedWord = normalizeWord(word);\n if (/[.?!]/.test(content)) {\n paragraph.words.push(normalizedWord);\n paragraph.text.push(content);\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(normalizedWord);\n paragraph.text.push(content);\n }\n });\n\n return results;\n};\n\nconst groupSpeakerWordsInParagraphs = (words, speakerLabels) => {\n const wordsBySpeaker = groupWordsBySpeaker(words, speakerLabels);\n\n return wordsBySpeaker.map((speakerGroup) => {\n return {\n words: speakerGroup.words.map(normalizeWord),\n text: speakerGroup.words.map((w) => getBestAlternativeForWord(w).content),\n speaker: speakerGroup.speaker\n };\n });\n};\n\nconst amazonTranscribeToDraft = amazonTranscribeJson => {\n const results = [];\n const tmpWords = amazonTranscribeJson.results.items;\n const speakerLabels = amazonTranscribeJson.results.speaker_labels;\n const wordsWithRemappedPunctuation = mapPunctuationItemsToWords(tmpWords);\n const speakerSegmentation = typeof(speakerLabels) != 'undefined';\n\n const wordsByParagraphs = speakerSegmentation ?\n groupSpeakerWordsInParagraphs(wordsWithRemappedPunctuation, speakerLabels) :\n groupWordsInParagraphs(\n wordsWithRemappedPunctuation\n );\n\n wordsByParagraphs.forEach((paragraph, i) => {\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,\n words: paragraph.words,\n start: parseFloat(paragraph.words[0].start)\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n });\n\n return results;\n};\n\nexport default amazonTranscribeToDraft;\n","/**\n * Convert IBM json to draftJS\n * see `sample` folder for example of input and output as well as `example-usage.js`\n *\n */\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\n\nconst ibmToDraft = ibmJson => {\n // helper function to normalise IBM words at line level\n const normalizeTimeStampsToWords = timestamps => {\n return timestamps.map(ibmWord => {\n return {\n text: ibmWord[0],\n start: ibmWord[1],\n end: ibmWord[2]\n };\n });\n };\n\n //\n const normalizeIBMWordsList = ibmResults => {\n const normalisedResults = [];\n ibmResults.forEach(result => {\n // nested array to keep paragraph segmentation same as IBM lines\n normalisedResults.push(normalizeTimeStampsToWords(result.alternatives[0].timestamps));\n // TODO: can be revisited - as separate PR by flattening the array like this\n // normalisedResults = normalisedResults.concact(normalizeTimeStampsToWords(result.alternatives[0].timestamps));\n // addSpeakersToWords function would need adjusting as would be dealing with a 1D array instead of 2D\n // if edge case, like in example file, that there's one speaker recognised through all of speaker segemtnation info\n // could break into paragraph when is over a minute? at end of IBM line?\n // or punctuation, altho IBM does not seem to provide punctuation?\n });\n\n return normalisedResults;\n };\n\n // TODO: could be separate file\n const findSpeakerSegmentForWord = (word, speakerSegments) => {\n const tmpSegment = speakerSegments.find(seg => {\n const segStart = seg.from;\n const segEnd = seg.to;\n\n return ((word.start === segStart) && (word.end === segEnd));\n });\n // if find doesn't find any matches it returns an undefined\n if (tmpSegment === undefined) {\n // covering edge case orphan word not belonging to any segments\n // adding UKN speaker label\n return 'UKN';\n } else {\n // find returns the first element that matches the criteria\n return `S_${ tmpSegment.speaker }`;\n }\n };\n // add speakers to words\n const addSpeakersToWords = (ibmWords, ibmSpeakers) => {\n return ibmWords.map(lines => {\n return lines.map(word => {\n\n word.speaker = findSpeakerSegmentForWord(word, ibmSpeakers);\n\n return word;\n });\n });\n };\n\n const ibmNormalisedWordsToDraftJs = (ibmNormalisedWordsWithSpeakers) => {\n const draftJsParagraphsResults = [];\n ibmNormalisedWordsWithSpeakers.forEach((ibmParagraph) => {\n const draftJsContentBlockParagraph = {\n text: ibmParagraph.map((word) => {return word.text;}).join(' '),\n type: 'paragraph',\n data: {\n // Assuming each paragraph in IBM line is the same\n // for context it just seems like the IBM data structure gives you word level speakers,\n // but also gives you \"lines\" so assuming each word in a line has the same speaker.\n speaker: ibmParagraph[0].speaker,\n words: ibmParagraph,\n start: ibmParagraph[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(ibmParagraph, 'text'), // wordAttributeName\n };\n draftJsParagraphsResults.push(draftJsContentBlockParagraph);\n });\n\n return draftJsParagraphsResults;\n };\n\n const normalisedWords = normalizeIBMWordsList(ibmJson.results[0].results);\n // TODO: nested array of words, to keep some sort of paragraphs, in case there's only one speaker\n // can be refactored/optimised later\n const ibmNormalisedWordsWithSpeakers = addSpeakersToWords(normalisedWords, ibmJson.results[0].speaker_labels);\n const ibmDratJs = ibmNormalisedWordsToDraftJs(ibmNormalisedWordsWithSpeakers);\n\n return ibmDratJs;\n};\n\nexport default ibmToDraft;\n","/**\nedge cases\n- more segments then words - not an issue if you start by matching words with segment\nand handle edge case where it doesn't find a match\n- more words then segments - orphan words\n */\nfunction groupWordsInParagraphsBySpeakers(words, segments) {\n // add speakers to each word\n const wordsWithSpeakers = addSpeakerToEachWord(words, segments);\n // group words by speakers sequentially\n const result = groupWordsBySpeaker(wordsWithSpeakers);\n\n return result;\n};\n\n/**\n* Add speakers to each words\n* if it doesn't have add unknown attribute `U_UKN`\n* @param {*} words\n* @param {*} segments\n*/\nfunction addSpeakerToEachWord(words, segments) {\n return words.map((word) => {\n word.speaker = findSegmentForWord(word, segments);\n\n return word;\n });\n}\n\n/**\n * Groups Words by speaker attribute\n * @param {array} wordsWithSpeakers - same as kaldi words list but with a `speaker` label attribute on each word\n * @return {array} - list of paragraph objcts, with words, text and sepaker attributes.\n * where words is an array and the other two are strings.\n */\nfunction groupWordsBySpeaker(wordsWithSpeakers) {\n let currentSpeaker = wordsWithSpeakers[0].speaker;\n const results = [ ];\n let paragraph = { words: [], text: '', speaker: '' };\n wordsWithSpeakers.forEach((word) => {\n // if current speaker same as word speaker add words to paragraph\n if (currentSpeaker === word.speaker) {\n delete word.speaker;\n paragraph.words.push(word);\n paragraph.text += word.text + ' ';\n paragraph.speaker = currentSpeaker;\n }\n // if it's not same speaker\n else {\n // update current speaker\n currentSpeaker = word.speaker;\n // remove spacing in text\n paragraph.text = paragraph.text.trim();\n //save previous paragraph\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: '', speaker: 'U_UKN' };\n // add words attributes to new\n paragraph.words.push(word);\n paragraph.text += word.text + ' ';\n }\n });\n // add last paragraph\n results.push(paragraph);\n\n return results;\n}\n\n/**\n* Helper functions\n*/\n\n/**\n* given word start and end time attributes\n* looks for segment range that contains that word\n* if it doesn't find any it returns a segment with `UKN`\n* speaker attributes.\n* @param {object} word - word object\n* @param {array} segments - list of segments objects\n* @return {object} - a single segment whose range contains the word\n*/\nfunction findSegmentForWord(word, segments) {\n\n const tmpSegment = segments.find((seg) => {\n return ((word.start >= seg.start) && (word.end <= seg.end));\n });\n // if find doesn't find any matches it returns an undefined\n if (tmpSegment === undefined) {\n // covering edge case orphan word not belonging to any segments\n // adding UKN speaker label\n return 'UKN';\n } else {\n // find returns the first element that matches the criteria\n return tmpSegment.speaker;\n }\n}\n\nexport default groupWordsInParagraphsBySpeakers;","/**\n * Convert Digital Paper Edit transcript json format to DraftJS\n * More details see\n * https://github.com/bbc/digital-paper-edit\n */\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport groupWordsInParagraphsBySpeakers from './group-words-by-speakers.js';\n/**\n * groups words list from kaldi transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words opbjects from kaldi transcript\n */\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n words.forEach(word => {\n // if word contains punctuation\n if (/[.?!]/.test(word.text)) {\n paragraph.words.push(word);\n paragraph.text.push(word.text);\n paragraph.text = paragraph.text.join(' ');\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(word);\n paragraph.text.push(word.text);\n }\n });\n\n return results;\n};\n\nconst digitalPaperEditToDraft = (digitalPaperEditTranscriptJson) => {\n const results = [];\n let speakerSegmentation = null;\n let wordsByParagraphs = [];\n const tmpWords = digitalPaperEditTranscriptJson.words;\n\n if (digitalPaperEditTranscriptJson.paragraphs) {\n speakerSegmentation = digitalPaperEditTranscriptJson.paragraphs;\n }\n\n if (!speakerSegmentation) {\n wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n } else {\n wordsByParagraphs = groupWordsInParagraphsBySpeakers(tmpWords, digitalPaperEditTranscriptJson.paragraphs );\n }\n\n wordsByParagraphs.forEach((paragraph, i) => {\n // if paragraph contain words\n // eg sometimes the speaker segmentation might not contain words :man-shrugging:\n if (paragraph.words[0]) {\n let speakerLabel = `TBC ${ i }`;\n if (speakerSegmentation) {\n speakerLabel = paragraph.speaker;\n }\n\n const draftJsContentBlockParagraph = {\n text: paragraph.text,\n type: 'paragraph',\n data: {\n speaker: speakerLabel,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n }\n });\n\n return results;\n};\n\nexport default digitalPaperEditToDraft;\n","/**\n * Helper function to generate draft.js entityMap from draftJS blocks,\n */\n\n/**\n * helper function to flatten a list.\n * converts nested arrays into one dimensional array\n * @param {array} list\n */\nconst flatten = list => list.reduce((a, b) => a.concat(Array.isArray(b) ? flatten(b) : b), []);\n\n/**\n * helper function to create createEntityMap\n * @param {*} blocks - draftJs blocks\n */\nconst createEntityMap = (blocks) => {\n const entityRanges = blocks.map(block => block.entityRanges);\n const flatEntityRanges = flatten(entityRanges);\n\n const entityMap = {};\n\n flatEntityRanges.forEach((data) => {\n entityMap[data.key] = {\n type: 'WORD',\n mutability: 'MUTABLE',\n data,\n };\n });\n\n return entityMap;\n};\n\nexport default createEntityMap;","import bbcKaldiToDraft from './bbc-kaldi/index';\nimport autoEdit2ToDraft from './autoEdit2/index';\nimport speechmaticsToDraft from './speechmatics/index';\nimport amazonTranscribeToDraft from './amazon-transcribe/index';\nimport ibmToDraft from './ibm/index';\nimport digitalPaperEditToDraft from './digital-paper-edit/index';\nimport createEntityMap from './create-entity-map/index';\n\n/**\n * Adapters for STT conversion\n * @param {json} transcriptData - A json transcript with some word accurate timecode\n * @param {string} sttJsonType - the type of transcript supported by the available adapters\n */\nconst sttJsonAdapter = (transcriptData, sttJsonType) => {\n let blocks;\n switch (sttJsonType) {\n case 'bbckaldi':\n blocks = bbcKaldiToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'autoedit2':\n blocks = autoEdit2ToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'speechmatics':\n blocks = speechmaticsToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'ibm':\n blocks = ibmToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'draftjs':\n return transcriptData; // (typeof transcriptData === 'string')? JSON.parse(transcriptData): transcriptData;\n\n case 'amazontranscribe':\n blocks = amazonTranscribeToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'digitalpaperedit':\n blocks = digitalPaperEditToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n default:\n // code block\n console.error('Did not recognize the stt engine.');\n }\n};\n\nexport default sttJsonAdapter;\n"],"sourceRoot":""}
\No newline at end of file