UNPKG

56.6 kBSource Map (JSON)View Raw
1{"version":3,"sources":["webpack:///webpack/bootstrap","webpack:///./packages/stt-adapters/generate-entities-ranges/index.js","webpack:///./packages/stt-adapters/bbc-kaldi/group-words-by-speakers.js","webpack:///./packages/stt-adapters/bbc-kaldi/index.js","webpack:///./packages/stt-adapters/autoEdit2/index.js","webpack:///./packages/stt-adapters/speechmatics/index.js","webpack:///./packages/stt-adapters/amazon-transcribe/group-words-by-speakers.js","webpack:///./packages/stt-adapters/amazon-transcribe/index.js","webpack:///./packages/stt-adapters/ibm/index.js","webpack:///./packages/stt-adapters/digital-paper-edit/index.js","webpack:///./packages/stt-adapters/create-entity-map/index.js","webpack:///./packages/stt-adapters/google-stt/index.js","webpack:///./packages/stt-adapters/index.js","webpack:///./packages/stt-adapters/digital-paper-edit/group-words-by-speakers.js"],"names":["installedModules","__webpack_require__","moduleId","exports","module","i","l","modules","call","m","c","d","name","getter","o","Object","defineProperty","enumerable","get","r","Symbol","toStringTag","value","t","mode","__esModule","ns","create","key","bind","n","object","property","prototype","hasOwnProperty","p","s","generateEntitiesRanges","words","wordAttributeName","position","map","word","result","start","end","confidence","text","offset","length","Math","random","toString","substring","groupWordsInParagraphsBySpeakers","segments","wordsWithSpeakers","currentSpeaker","speaker","results","paragraph","forEach","push","punct","trim","groupWordsBySpeaker","tmpWordsWithSpeakers","tmpSpeakerSegment","tmpSegment","find","seg","segEnd","duration","gender","findSegmentForWord","addSpeakerToEachWord","bbcKaldiToDraft","bbcKaldiJson","tmpWords","speakerSegmentation","retval","segmentation","test","join","groupWordsInParagraphs","speakerLabel","draftJsContentBlockParagraph","type","data","entityRanges","autoEdit2ToDraft","autoEdit2Json","autoEditText","autoEditparagraph","autoEditLine","line","tmpWord","startTime","endTime","getSpeaker","speakers","speakerIdx","segmentStart","parseFloat","speechmaticsToDraft","speechmaticsJson","curatedWords","maxParagraphWords","newSpeaker","oldSpeaker","sentenceEnd","element","index","time","toLowerCase","replace","paragraphStart","findSpeakerForWord","start_time","end_time","firstMatchingSegment","speaker_label","speakerLabels","groupedWords","groupWordsBySpeakerLabel","w","assign","addSpeakerLabelToWords","getBestAlternativeForWord","alternatives","reduce","prev","current","normalizeWord","currentWord","bestAlternative","content","mapPunctuationItemsToWords","itemsToRemove","punctuation","previousWord","punctuationContent","appendPunctuationToPreviousWord","filter","item","includes","amazonTranscribeToDraft","amazonTranscribeJson","items","speaker_labels","wordsWithRemappedPunctuation","speakerGroup","groupSpeakerWordsInParagraphs","normalizedWord","ibmToDraft","ibmJson","ibmWords","ibmSpeakers","ibmResults","normalisedResults","normalisedWords","timestamps","ibmWord","ibmNormalisedWordsWithSpeakers","draftJsParagraphsResults","ibmParagraph","lines","speakerSegments","segStart","from","to","findSpeakerSegmentForWord","digitalPaperEditToDraft","digitalPaperEditTranscriptJson","paragraphs","flatten","list","a","b","concat","Array","isArray","createEntityMap","blocks","block","flatEntityRanges","entityMap","mutability","computeTimeInSeconds","startSecond","nanoSecond","seconds","sentences","sentence","getBestAlternativeSentence","transcript","nanos","gcpSttToDraft","gcpSttJson","sttJsonAdapter","transcriptData","sttJsonType","console","error","currentSegment","currentSegmentIndex","previousSegmentIndex","indexOf","addWordsToSpeakersParagraphs"],"mappings":"2BACE,IAAIA,EAAmB,GAGvB,SAASC,EAAoBC,GAG5B,GAAGF,EAAiBE,GACnB,OAAOF,EAAiBE,GAAUC,QAGnC,IAAIC,EAASJ,EAAiBE,GAAY,CACzCG,EAAGH,EACHI,GAAG,EACHH,QAAS,IAUV,OANAI,EAAQL,GAAUM,KAAKJ,EAAOD,QAASC,EAAQA,EAAOD,QAASF,GAG/DG,EAAOE,GAAI,EAGJF,EAAOD,QA0Df,OArDAF,EAAoBQ,EAAIF,EAGxBN,EAAoBS,EAAIV,EAGxBC,EAAoBU,EAAI,SAASR,EAASS,EAAMC,GAC3CZ,EAAoBa,EAAEX,EAASS,IAClCG,OAAOC,eAAeb,EAASS,EAAM,CAAEK,YAAY,EAAMC,IAAKL,KAKhEZ,EAAoBkB,EAAI,SAAShB,GACX,oBAAXiB,QAA0BA,OAAOC,aAC1CN,OAAOC,eAAeb,EAASiB,OAAOC,YAAa,CAAEC,MAAO,WAE7DP,OAAOC,eAAeb,EAAS,aAAc,CAAEmB,OAAO,KAQvDrB,EAAoBsB,EAAI,SAASD,EAAOE,GAEvC,GADU,EAAPA,IAAUF,EAAQrB,EAAoBqB,IAC/B,EAAPE,EAAU,OAAOF,EACpB,GAAW,EAAPE,GAA8B,iBAAVF,GAAsBA,GAASA,EAAMG,WAAY,OAAOH,EAChF,IAAII,EAAKX,OAAOY,OAAO,MAGvB,GAFA1B,EAAoBkB,EAAEO,GACtBX,OAAOC,eAAeU,EAAI,UAAW,CAAET,YAAY,EAAMK,MAAOA,IACtD,EAAPE,GAA4B,iBAATF,EAAmB,IAAI,IAAIM,KAAON,EAAOrB,EAAoBU,EAAEe,EAAIE,EAAK,SAASA,GAAO,OAAON,EAAMM,IAAQC,KAAK,KAAMD,IAC9I,OAAOF,GAIRzB,EAAoB6B,EAAI,SAAS1B,GAChC,IAAIS,EAAST,GAAUA,EAAOqB,WAC7B,WAAwB,OAAOrB,EAAgB,SAC/C,WAA8B,OAAOA,GAEtC,OADAH,EAAoBU,EAAEE,EAAQ,IAAKA,GAC5BA,GAIRZ,EAAoBa,EAAI,SAASiB,EAAQC,GAAY,OAAOjB,OAAOkB,UAAUC,eAAe1B,KAAKuB,EAAQC,IAGzG/B,EAAoBkC,EAAI,GAIjBlC,EAAoBA,EAAoBmC,EAAI,I,kCChDtCC,IAtBgB,SAACC,EAAOC,GACrC,IAAIC,EAAW,EAEf,OAAOF,EAAMG,KAAI,SAACC,GAChB,IAAMC,EAAS,CACbC,MAAOF,EAAKE,MACZC,IAAKH,EAAKG,IACVC,WAAYJ,EAAKI,WACjBC,KAAML,EAAKH,GACXS,OAAQR,EACRS,OAAQP,EAAKH,GAAmBU,OAChCrB,IAAKsB,KAAKC,SACPC,SAAS,IACTC,UAAU,IAKf,OAFAb,EAAWA,EAAWE,EAAKH,GAAmBU,OAAS,EAEhDN,O,mDCuFIW,MA/Gf,SAA0ChB,EAAOiB,GAM/C,OA2BF,SAA6BC,GAAoB,IAC3CC,EAAiBD,EAAkB,GAAGE,QACpCC,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,IA0BhD,OAzBAF,EAAkBK,SAAQ,SAACnB,GAErBe,IAAmBf,EAAKgB,SAC1BE,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKqB,MAAQ,IAC/BH,EAAUF,QAAUD,IAKpBA,EAAiBf,EAAKgB,QAEtBE,EAAUb,KAAOa,EAAUb,KAAKiB,OAEhCL,EAAQG,KAAKF,IAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,UAElCpB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKqB,MAAQ,QAInCJ,EAAQG,KAAKF,GAEND,EA1DQM,CAWjB,SAA8B3B,EAAOiB,GACnC,IAAMW,EAAuB,GAQ7B,OAPA5B,EAAMuB,SAAQ,SAACnB,GACb,IAyFuBgB,EAzFjBS,EA4DV,SAA4BzB,EAAMa,GAEhC,IAAMa,EAAab,EAASc,MAAK,SAACC,GAChC,IAAMC,EAASD,EAAI1B,MAAQ0B,EAAIE,SAE/B,OAAS9B,EAAKE,OAAS0B,EAAI1B,OAAWF,EAAKG,KAAO0B,KALV,YAQtC,IAAAH,EAGK,CACL,QAAS,UAGTV,QAAS,CAAE,MAAO,MAAOe,OAAQ,MAI5BL,EA/EmBM,CAAmBhC,EAAMa,GAEnDb,EAAKgB,SAuFkBA,EAvFUS,EAAkBT,SAwFtCe,OAAS,IAAMf,EAAQ,OAvFpCQ,EAAqBJ,KAAKpB,MAGrBwB,EAtBmBS,CAAqBrC,EAAOiB,EAASA,YCkFlDqB,EAtDS,SAAAC,GAAiB,IAEnCC,EADEnB,EAAU,GAEZoB,EAAsB,KAgD1B,YA3CI,IAAAF,EAAaG,QAMfF,EAAWD,EAAavC,WACpB,IAAAuC,EAAaI,eACfF,EAAsBF,EAAaI,gBAPrCH,EAAWD,EAAaG,OAAO1C,WAC3B,IAAAuC,EAAaG,OAAOC,eACtBF,EAAsBF,EAAaG,OAAOC,gBASlB,OAAxBF,EA1CyB,SAAAzC,GAAU,IACjCqB,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IAiBnC,OAfAT,EAAMuB,SAAQ,SAAAnB,GAER,QAAQwC,KAAKxC,EAAKqB,QACpBH,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,OACzBH,EAAUb,KAAOa,EAAUb,KAAKoC,KAAK,KACrCxB,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,WAItBJ,EAwBeyB,CAAuBN,GAEvBxB,EAAiCwB,EAAUC,IAG/ClB,SAAQ,SAACD,EAAWvD,GAGpC,QAAI,IAAAuD,EAAUtB,MAAM,GAAkB,CACpC,IAAI+C,EAAY,cAAWhF,GACC,OAAxB0E,IACFM,EAAezB,EAAUF,SAG3B,IAAM4B,EAA+B,CACnCvC,KAAMa,EAAUb,KAChBwC,KAAM,YACNC,KAAM,CACJ9B,QAAS2B,EACT/C,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5B6C,aAAcpD,YAAuBuB,EAAUtB,MAAO,UAExDqB,EAAQG,KAAKwB,OAIV3B,GCfM+B,EA1BU,SAACC,GAAmB,IACrChC,EAAU,GAsBhB,OAxD6B,SAACiC,GAAkB,IAC1CjC,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IA4BnC,OA1BA6C,EAAa/B,SAAQ,SAACgC,GACpBA,EAAkBjC,UAAUC,SAAQ,SAACiC,GACnCA,EAAaC,KAAKlC,SAAQ,SAACnB,GAIzB,IAAMsD,EAAU,CACdjD,KAAML,EAAKK,KACXH,MAAOF,EAAKuD,UACZpD,IAAKH,EAAKwD,SAGR,QAAQhB,KAAKxC,EAAKK,OACpBa,EAAUtB,MAAMwB,KAAKkC,GACrBpC,EAAUb,KAAKe,KAAKpB,EAAKK,MACzBY,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKkC,GACrBpC,EAAUb,KAAKe,KAAKpB,EAAKK,gBAM1BY,EAMmByB,CADTO,EAAc5C,MAGbc,SAAQ,SAACD,EAAWvD,GACpC,IAAMiF,EAA+B,CACnCvC,KAAMa,EAAUb,KAAKoC,KAAK,KAC1BI,KAAM,YACNC,KAAM,CACJ9B,QAAQ,OAAD,OAAUrD,GACjBiC,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5B6C,aAAcpD,YAAuBuB,EAAUtB,MAAO,SAGxDqB,EAAQG,KAAKwB,MAIR3B,GCxDHwC,EAAa,SAACvD,EAAOwD,GACzB,IAAK,IAAIC,KAAcD,EAAU,CAAC,IAC1B1C,EAAU0C,EAASC,GACnBC,EAAeC,WAAW3D,GAChC,GAAI0D,GAAgB5C,EAAQd,MAAQ0D,EAAe5C,EAAQb,IACzD,OAAOa,EAAQ9C,KAInB,MAAO,OAyGM4F,EAhDa,SAACC,GAAsB,IAfxBnE,EACnBoE,EAeA/C,EAAU,GA4ChB,OA5DyBrB,EAmBImE,EAAiBnE,MAlBxCoE,EAAe,GACrBpE,EAAMuB,SAAQ,SAACnB,GACT,QAAQwC,KAAKxC,EAAK9B,OACpB8F,EAAaA,EAAazD,OAAS,GAAGrC,KAAO8F,EAAaA,EAAazD,OAAS,GAAGrC,KAAO8B,EAAK9B,KAC/F8F,EAAaA,EAAazD,OAAS,GAAGuB,UAAY+B,WAAWG,EAAaA,EAAazD,OAAS,GAAGuB,UAAY+B,WAAW7D,EAAK8B,WAAWpB,YAE1IsD,EAAa5C,KAAKpB,MAxCO,SAACJ,EAAO8D,EAAUO,GAAuB,IAIlEC,EAHEjD,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,IAC5CmD,EAAaV,EAAW7D,EAAM,GAAGM,MAAOwD,GAExCU,GAAW,EAoBf,OAlBAxE,EAAMuB,SAAQ,SAACnB,KACbkE,EAAaT,EAAWzD,EAAKE,MAAOwD,MAEjBS,GAAejD,EAAUtB,MAAMW,OAAS0D,GAAqBG,KAC9ElD,EAAUF,QAAUmD,EACpBlD,EAAQG,KAAKF,GACbiD,EAAaD,EAEbhD,EAAY,CAAEtB,MAAO,GAAIS,KAAM,KAEjCa,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKqB,OACzB+C,IAAc,QAAQ5B,KAAKxC,EAAKqB,UAGlCH,EAAUF,QAAUmD,EACpBlD,EAAQG,KAAKF,GAEND,EAiDmByB,CA7BnBsB,EAQajE,KAAI,SAACsE,EAASC,GAChC,MAAQ,CACNpE,MAAOmE,EAAQE,KACfpE,KAAM0D,WAAWQ,EAAQE,MAAQV,WAAWQ,EAAQvC,WAAWpB,WAC/DN,WAAYiE,EAAQjE,WACpBJ,KAAMqE,EAAQnG,KAAKsG,cAAcC,QAAQ,SAAU,IACnDpD,MAAOgD,EAAQnG,KACfoG,MAAOA,MAKGP,EAAiBL,SACL3D,KAAI,SAACsE,GAC7B,MAAQ,CACNnE,MAAO2D,WAAWQ,EAAQE,MAC1BpE,IAAM0D,WAAWQ,EAAQE,MAAQV,WAAWQ,EAAQvC,UACpD5D,KAAMmG,EAAQnG,SAIsD,KAEtDiD,SAAQ,SAACD,GAAe,IAClCwD,EAAiBxD,EAAUtB,MAAM,GAAGM,MACpC0C,EAA+B,CACnCvC,KAAMa,EAAUb,KAAKoC,KAAK,KAC1BI,KAAM,YACNC,KAAM,CACJ9B,QAASE,EAAUF,QACnBpB,MAAOsB,EAAUtB,MACjBM,MAAOwE,GAIT3B,aAAcpD,YAAuBuB,EAAUtB,MAAO,UAExDqB,EAAQG,KAAKwB,MAGR3B,GC1GI0D,EAAqB,SAAC3E,EAAMa,GAAc,IAC/C0C,EAAYM,WAAW7D,EAAK4E,YAC5BpB,EAAUK,WAAW7D,EAAK6E,UAC1BC,EAAuBjE,EAASc,MAAK,SAACC,GAC1C,OAAO2B,GAAaM,WAAWjC,EAAIgD,aAAepB,GAAWK,WAAWjC,EAAIiD,aAJ1B,YAMhD,IAAAC,EACK,MAEAA,EAAqBC,cAAcN,QAAQ,OAAQ,KAQjDlD,EAAsB,SAAC3B,EAAOoF,GAGzC,OAtCsC,SAACpF,GAAW,IAC5CqF,EAAe,GACjBlE,EAAiB,GAarB,OAZAnB,EAAMuB,SAAQ,SAACnB,GACTA,EAAK+E,gBAAkBhE,EACzBkE,EAAaA,EAAa1E,OAAS,GAAGX,MAAMwB,KAAKpB,IAEjDe,EAAiBf,EAAK+E,cAEtBE,EAAa7D,KAAK,CAChBJ,QAAShB,EAAK+E,cACdnF,MAAO,CAAEI,SAIRiF,EAuBAC,CAPsB,SAACtF,EAAOiB,GACrC,OAAOjB,EAAMG,KAAI,SAAAoF,GAAC,OAAI9G,OAAO+G,OAAOD,EAAG,CAAE,cAAiBR,EAAmBQ,EAAGtE,QAItDwE,CAAuBzF,EAAOoF,EAAcnE,Y,oqBC5BjE,IAUMyE,EAA4B,SAAAtF,GACvC,MAAI,cAAcwC,KAAKxC,EAAK6C,MACnBxE,OAAO+G,OAAOpF,EAAKuF,aAAa,GAAI,CAAEnF,WAAY,IAEzBJ,EAAKuF,aAAaC,QAAO,SACzDC,EACAC,GAEA,OAAO7B,WAAW4B,EAAKrF,YAAcyD,WAAW6B,EAAQtF,YACpDqF,EACAC,MAUFC,EAAgB,SAAAC,GACpB,IAAMC,EAAkBP,EAA0BM,GAElD,MAAO,CACL1F,MAAO2D,WAAW+B,EAAYhB,YAC9BzE,IAAK0D,WAAW+B,EAAYf,UAC5BxE,KAAMwF,EAAgBC,QACtB1F,WAAYyD,WAAWgC,EAAgBzF,cAgB9B2F,EAA6B,SAAAnG,GAAU,IAC5CoG,EAAgB,GAatB,OAZmBpG,EAAMG,KAAI,SAACC,EAAMsE,GAAU,MAE1B,gBAAdtE,EAAK6C,MACPmD,EAAc5E,KAAKkD,EAAQ,GAjBc,SAAC2B,EAAaC,GAC3D,IAAMC,EAAqBF,EAAYV,aAAa,GAAGO,QAEvD,OAAO,EAAP,GACKI,EADL,CAEEX,aAAcW,EAAaX,aAAaxF,KAAI,SAAAoF,GAAC,YACxCA,EADwC,CAE3CW,QAASX,EAAEW,SAhDgB9F,EAgDYmG,EA/CpCnG,EAAKyE,QAAQ,MAAO,OADI,IAAAzE,OA6DpBoG,CAAgCpG,EAFxBJ,EAAM0E,EAAQ,KAItBtE,KAIOqG,QAAO,SAACC,EAAMhC,GAC9B,OAAQ0B,EAAcO,SAASjC,OA6EpBkC,EAhCiB,SAAAC,GAAyB,IACjDxF,EAAU,GACVmB,EAAWqE,EAAqBxF,QAAQyF,MACxC1B,EAAgByB,EAAqBxF,QAAQ0F,eAC7CC,EAA+Bb,EAA2B3D,GAyBhE,YAxBqD,IAAlB4C,EAjBC,SAACpF,EAAOoF,GAG5C,OAFuBzD,EAAoB3B,EAAOoF,GAE5BjF,KAAI,SAAC8G,GACzB,MAAO,CACLjH,MAAOiH,EAAajH,MAAMG,IAAI4F,GAC9BtF,KAAMwG,EAAajH,MAAMG,KAAI,SAACoF,GAAD,OAAOG,EAA0BH,GAAGW,WACjE9E,QAAS6F,EAAa7F,YAaxB8F,CAA8BF,EAA8B5B,GA5CjC,SAAApF,GAAU,IACjCqB,EAAU,GACZC,EAAY,CACdtB,MAAO,GACPS,KAAM,IAiBR,OAfAT,EAAMuB,SAAQ,SAACnB,GAAU,IACjB8F,EAAUR,EAA0BtF,GAAM8F,QAC1CiB,EAAiBpB,EAAc3F,GACjC,QAAQwC,KAAKsD,IACf5E,EAAUtB,MAAMwB,KAAK2F,GACrB7F,EAAUb,KAAKe,KAAK0E,GACpB7E,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAK2F,GACrB7F,EAAUb,KAAKe,KAAK0E,OAIjB7E,EAwBLyB,CACEkE,IAGczF,SAAQ,SAACD,EAAWvD,GACpC,IAAMiF,EAA+B,CACnCvC,KAAMa,EAAUb,KAAKoC,KAAK,KAC1BI,KAAM,YACNC,KAAM,CACJ9B,QAASE,EAAUF,QAAV,kBAAgCE,EAAUF,SAA1C,cAA+DrD,GACxEiC,MAAOsB,EAAUtB,MACjBM,MAAO2D,WAAW3C,EAAUtB,MAAM,GAAGM,QAIvC6C,aAAcpD,YAAuBuB,EAAUtB,MAAO,SAExDqB,EAAQG,KAAKwB,MAGR3B,GCnDM+F,EA5FI,SAAAC,GAAW,IAgDAC,EAAUC,EAnCRC,EACtBC,EAqEFC,GAtEwBF,EAsEgBH,EAAQhG,QAAQ,GAAGA,QArEzDoG,EAAoB,GAC1BD,EAAWjG,SAAQ,SAAAlB,GAEjBoH,EAAkBjG,KAAgCnB,EAAOsF,aAAa,GAAGgC,WAdzDxH,KAAI,SAAAyH,GACpB,MAAO,CACLnH,KAAMmH,EAAQ,GACdtH,MAAOsH,EAAQ,GACfrH,IAAKqH,EAAQ,WAmBVH,GA+DT,OA9BoC,SAACI,GACnC,IAAMC,EAA2B,GAoBjC,OAnBAD,EAA+BtG,SAAQ,SAACwG,GACtC,IAAM/E,EAA+B,CACnCvC,KAAMsH,EAAa5H,KAAI,SAACC,GAAU,OAAOA,EAAKK,QAAQoC,KAAK,KAC3DI,KAAM,YACNC,KAAM,CAIJ9B,QAAS2G,EAAa,GAAG3G,QACzBpB,MAAO+H,EACPzH,MAAOyH,EAAa,GAAGzH,OAIzB6C,aAAcpD,YAAuBgI,EAAc,SAErDD,EAAyBtG,KAAKwB,MAGzB8E,EArB2B,EAXRR,EAsC8BI,EAtCpBH,EAsCqCF,EAAQhG,QAAQ,GAAG0F,eArCrFO,EAASnH,KAAI,SAAA6H,GAClB,OAAOA,EAAM7H,KAAI,SAAAC,GAIf,OAFAA,EAAKgB,QAtBuB,SAAChB,EAAM6H,GACvC,IAAMnG,EAAamG,EAAgBlG,MAAK,SAAAC,GAAQ,IACxCkG,EAAWlG,EAAImG,KACflG,EAASD,EAAIoG,GAEnB,OAAShI,EAAKE,QAAU4H,GAAc9H,EAAKG,MAAQ0B,KALM,YAQvD,IAAAH,EAGK,MAXkD,YAc5CA,EAAWV,SAQPiH,CAA0BjI,EAAMmH,GAExCnH,W,QCiBAkI,EA5CiB,SAACC,GAAoC,IAC7DlH,EAAU,GACZoB,EAAsB,KAEpBD,EAAW+F,EAA+BvI,MAqChD,OAnCIuI,EAA+BC,aACjC/F,EAAsB8F,EAA+BC,aAGlD/F,EAGiBzB,kBAAiCwB,EAAU+F,EAA+BC,YAnCnE,SAAAxI,GAAU,IACjCqB,EAAU,GACZC,EAAY,CAAEtB,MAAO,GAAIS,KAAM,IAiBnC,OAfAT,EAAMuB,SAAQ,SAAAnB,GAER,QAAQwC,KAAKxC,EAAKK,OACpBa,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKK,MACzBa,EAAUb,KAAOa,EAAUb,KAAKoC,KAAK,KACrCxB,EAAQG,KAAKF,GAEbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,MAE/Ba,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,KAAKe,KAAKpB,EAAKK,UAItBY,EAceyB,CAAuBN,IAK3BjB,SAAQ,SAACD,EAAWvD,GAGpC,GAAIuD,EAAUtB,MAAM,GAAI,CACtB,IAAI+C,EAAY,cAAWhF,GACvB0E,IACFM,EAAezB,EAAUF,SAG3B,IAAM4B,EAA+B,CACnCvC,KAAMa,EAAUb,KAChBwC,KAAM,YACNC,KAAM,CACJ9B,QAAS2B,EACT/C,MAAOsB,EAAUtB,MACjBM,MAAOgB,EAAUtB,MAAM,GAAGM,OAI5B6C,aAAcpD,YAAuBuB,EAAUtB,MAAO,SAExDqB,EAAQG,KAAKwB,OAIV3B,GClEHoH,EAAU,SAAAC,GAAI,OAAIA,EAAK9C,QAAO,SAAC+C,EAAGC,GAAJ,OAAUD,EAAEE,OAAOC,MAAMC,QAAQH,GAAKH,EAAQG,GAAKA,KAAI,KAuB5EI,EAjBS,SAACC,GAAY,IAC7B9F,EAAe8F,EAAO9I,KAAI,SAAA+I,GAAK,OAAIA,EAAM/F,gBACzCgG,EAAmBV,EAAQtF,GAE3BiG,EAAY,GAUlB,OARAD,EAAiB5H,SAAQ,SAAC2B,GACxBkG,EAAUlG,EAAK5D,KAAO,CACpB2D,KAAM,OACNoG,WAAY,UACZnG,WAIGkG,GCYHE,EAAuB,SAACC,EAAaC,GAEzC,IAAIC,EAAUxF,WAAWsF,GAMzB,YAJI,IAAAC,IACFC,GAAoBxF,WAAWuF,EAvCf,MA0CXC,GAqBH3G,EAAyB,SAAA4G,GAAc,IACrCrI,EAAU,GACZC,EAAY,CACdtB,MAAO,GACPS,KAAM,IAcR,OAXAiJ,EAAUnI,SAAQ,SAACoI,GACjB,IAAM1D,EAhEgC,SAAAyD,GACxC,OAAsC,IAAlCA,EAAU/D,aAAahF,OAClB+I,EAAU,GAGmBA,EAAU/D,aAAaC,QAAO,SAClEC,EACAC,GAEA,OAAO7B,WAAW4B,EAAKrF,YAAcyD,WAAW6B,EAAQtF,YACpDqF,EACAC,KAqDoB8D,CAA2BD,GACnDrI,EAAUb,KAAKe,KAAqCyE,EAAgB4D,WA/C1DnI,QAiDVuE,EAAgBjG,MAAMuB,SAAQ,SAACnB,GAzBb,IAAC4F,EAAaxF,EA0B9Bc,EAAUtB,MAAMwB,MA1BCwE,EA0BkB5F,EA1BLI,EA0BWyF,EAAgBzF,WAxBtD,CACLF,MAAOgJ,EAAqBtD,EAAYrC,UAAU8F,QAASzD,EAAYrC,UAAUmG,OACjFvJ,IAAK+I,EAAqBtD,EAAYpC,QAAQ6F,QAASzD,EAAYpC,QAAQkG,OAC3ErJ,KAAMuF,EAAY5F,KAClBI,WAAYA,QAsBZa,EAAQG,KAAKF,GACbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,OAG1BY,GA6BM0I,EA1BO,SAAAC,GAAe,IAC7B3I,EAAU,GAsBhB,OAlB0ByB,EAAuBkH,EAAW3I,SAE1CE,SAAQ,SAACD,EAAWvD,GACpC,IAAMiF,EAA+B,CACnCvC,KAAMa,EAAUb,KAAKoC,KAAK,KAC1BI,KAAM,YACNC,KAAM,CACJ9B,QAASE,EAAUF,QAAV,kBAAgCE,EAAUF,SAA1C,cAA+DrD,GACxEiC,MAAOsB,EAAUtB,MACjBM,MAAO2D,WAAW3C,EAAUtB,MAAM,GAAGM,QAIvC6C,aAAcpD,YAAuBuB,EAAUtB,MAAO,SAExDqB,EAAQG,KAAKwB,MAGR3B,GClHT,gDAwDe4I,UA1CQ,SAACC,EAAgBC,GACtC,IAAIlB,EACJ,OAAQkB,GACR,IAAK,WAGH,MAAO,CAAElB,OAFTA,EAAS3G,EAAgB4H,GAERd,UAAWJ,EAAgBC,IAC9C,IAAK,YAGH,MAAO,CAAEA,OAFTA,EAAS7F,EAAiB8G,GAETd,UAAWJ,EAAgBC,IAC9C,IAAK,eAGH,MAAO,CAAEA,OAFTA,EAAS/E,EAAoBgG,GAEZd,UAAWJ,EAAgBC,IAC9C,IAAK,MAGH,MAAO,CAAEA,OAFTA,EAAS7B,EAAW8C,GAEHd,UAAWJ,EAAgBC,IAC9C,IAAK,UACH,OAAOiB,EAET,IAAK,mBAGH,MAAO,CAAEjB,OAFTA,EAASrC,EAAwBsD,GAEhBd,UAAWJ,EAAgBC,IAC9C,IAAK,mBAGH,MAAO,CAAEA,OAFTA,EAASX,EAAwB4B,GAEhBd,UAAWJ,EAAgBC,IAE9C,IAAK,aAGH,MAAO,CAAEA,OAFTA,EAASc,EAAcG,GAENd,UAAWJ,EAAgBC,IAE9C,QAEEmB,QAAQC,MAAM,wC,gCCpDlB,OA4IerJ,UA9Df,SAA0ChB,EAAOiB,GAG/C,OAGF,SAAuCjB,EAAOiB,GAAW,IACjDI,EAAU,GACZiJ,EAAiB,MACjBC,EAAsB,EACtBC,EAAuB,EACvBlJ,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,IAwBhD,OAvBApB,EAAMuB,SAAQ,SAACnB,IACbkK,EAsCJ,SAA4BlK,EAAMa,GAQhC,OANmBA,EAASc,MAAK,SAACC,GAChC,GAAK5B,EAAKE,OAAS0B,EAAI1B,OAAWF,EAAKG,KAAOyB,EAAIzB,IAChD,OAAOyB,KA1CQI,CAAmBhC,EAAMa,OAGxCsJ,EAAsBtJ,EAASwJ,QAAQH,MACXE,GAC1BlJ,EAAUtB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKK,KAAO,IAC9Ba,EAAUF,QAAUkJ,EAAelJ,UAGnCoJ,EAAuBD,EACvBjJ,EAAUb,KAAKiB,OACfL,EAAQG,KAAKF,IACbA,EAAY,CAAEtB,MAAO,GAAIS,KAAM,GAAIW,QAAS,KAClCpB,MAAMwB,KAAKpB,GACrBkB,EAAUb,MAAQL,EAAKK,KAAO,IAC9Ba,EAAUF,QAAUkJ,EAAelJ,aAIzCC,EAAQG,KAAKF,GAEND,EAlCQqJ,CAA6B1K,EAAOiB","file":"sttJsonAdapter.js","sourcesContent":[" \t// The module cache\n \tvar installedModules = {};\n\n \t// The require function\n \tfunction __webpack_require__(moduleId) {\n\n \t\t// Check if module is in cache\n \t\tif(installedModules[moduleId]) {\n \t\t\treturn installedModules[moduleId].exports;\n \t\t}\n \t\t// Create a new module (and put it into the cache)\n \t\tvar module = installedModules[moduleId] = {\n \t\t\ti: moduleId,\n \t\t\tl: false,\n \t\t\texports: {}\n \t\t};\n\n \t\t// Execute the module function\n \t\tmodules[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n \t\t// Flag the module as loaded\n \t\tmodule.l = true;\n\n \t\t// Return the exports of the module\n \t\treturn module.exports;\n \t}\n\n\n \t// expose the modules object (__webpack_modules__)\n \t__webpack_require__.m = modules;\n\n \t// expose the module cache\n \t__webpack_require__.c = installedModules;\n\n \t// define getter function for harmony exports\n \t__webpack_require__.d = function(exports, name, getter) {\n \t\tif(!__webpack_require__.o(exports, name)) {\n \t\t\tObject.defineProperty(exports, name, { enumerable: true, get: getter });\n \t\t}\n \t};\n\n \t// define __esModule on exports\n \t__webpack_require__.r = function(exports) {\n \t\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n \t\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n \t\t}\n \t\tObject.defineProperty(exports, '__esModule', { value: true });\n \t};\n\n \t// create a fake namespace object\n \t// mode & 1: value is a module id, require it\n \t// mode & 2: merge all properties of value into the ns\n \t// mode & 4: return value when already ns object\n \t// mode & 8|1: behave like require\n \t__webpack_require__.t = function(value, mode) {\n \t\tif(mode & 1) value = __webpack_require__(value);\n \t\tif(mode & 8) return value;\n \t\tif((mode & 4) && typeof value === 'object' && value && value.__esModule) return value;\n \t\tvar ns = Object.create(null);\n \t\t__webpack_require__.r(ns);\n \t\tObject.defineProperty(ns, 'default', { enumerable: true, value: value });\n \t\tif(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key));\n \t\treturn ns;\n \t};\n\n \t// getDefaultExport function for compatibility with non-harmony modules\n \t__webpack_require__.n = function(module) {\n \t\tvar getter = module && module.__esModule ?\n \t\t\tfunction getDefault() { return module['default']; } :\n \t\t\tfunction getModuleExports() { return module; };\n \t\t__webpack_require__.d(getter, 'a', getter);\n \t\treturn getter;\n \t};\n\n \t// Object.prototype.hasOwnProperty.call\n \t__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };\n\n \t// __webpack_public_path__\n \t__webpack_require__.p = \"\";\n\n\n \t// Load entry module and return exports\n \treturn __webpack_require__(__webpack_require__.s = 36);\n","/**\n * Helper function to generate draft.js entities,\n * see unit test for example data structure\n * it adds offset and length to recognise word in draftjs\n */\n\n/**\n* @param {json} words - List of words\n* @param {string} wordAttributeName - eg 'punct' or 'text' or etc.\n* attribute for the word object containing the text. eg word ={ punct:'helo', ... }\n* or eg word ={ text:'helo', ... }\n*/\nconst generateEntitiesRanges = (words, wordAttributeName) => {\n let position = 0;\n\n return words.map((word) => {\n const result = {\n start: word.start,\n end: word.end,\n confidence: word.confidence,\n text: word[wordAttributeName],\n offset: position,\n length: word[wordAttributeName].length,\n key: Math.random()\n .toString(36)\n .substring(6),\n };\n // increase position counter - to determine word offset in paragraph\n position = position + word[wordAttributeName].length + 1;\n\n return result;\n });\n};\n\nexport default generateEntitiesRanges;\n","/**\nedge cases\n- more segments then words - not an issue if you start by matching words with segment\nand handle edge case where it doesn't find a match\n- more words then segments - orphan words\n */\nfunction groupWordsInParagraphsBySpeakers(words, segments) {\n // add speakers to each word\n const wordsWithSpeakers = addSpeakerToEachWord(words, segments.segments);\n // group words by speakers sequentially\n const result = groupWordsBySpeaker(wordsWithSpeakers);\n\n return result;\n};\n\n/**\n* Add speakers to each words\n* if it doesn't have add unknown attribute `U_UKN`\n* @param {*} words\n* @param {*} segments\n*/\nfunction addSpeakerToEachWord(words, segments) {\n const tmpWordsWithSpeakers = [];\n words.forEach((word) => {\n const tmpSpeakerSegment = findSegmentForWord(word, segments);\n\n word.speaker = formatSpeakerName(tmpSpeakerSegment.speaker);\n tmpWordsWithSpeakers.push(word);\n });\n\n return tmpWordsWithSpeakers;\n}\n\n/**\n * Groups Words by speaker attribute\n * @param {array} wordsWithSpeakers - same as kaldi words list but with a `speaker` label attribute on each word\n * @return {array} - list of paragraph objcts, with words, text and sepaker attributes.\n * where words is an array and the other two are strings.\n */\nfunction groupWordsBySpeaker(wordsWithSpeakers) {\n let currentSpeaker = wordsWithSpeakers[0].speaker;\n const results = [ ];\n let paragraph = { words: [], text: '', speaker: '' };\n wordsWithSpeakers.forEach((word) => {\n // if current speaker same as word speaker add words to paragraph\n if (currentSpeaker === word.speaker) {\n paragraph.words.push(word);\n paragraph.text += word.punct + ' ';\n paragraph.speaker = currentSpeaker;\n }\n // if it's not same speaker\n else {\n // update current speaker\n currentSpeaker = word.speaker;\n // remove spacing in text\n paragraph.text = paragraph.text.trim();\n //save previous paragraph\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: '', speaker: 'U_UKN' };\n // add words attributes to new\n paragraph.words.push(word);\n paragraph.text += word.punct + ' ';\n }\n });\n // add last paragraph\n results.push(paragraph);\n\n return results;\n}\n\n/**\n* Helper functions\n*/\n\n/**\n* given word start and end time attributes\n* looks for segment range that contains that word\n* if it doesn't find any it returns a segment with `UKN`\n* speaker attributes.\n* @param {object} word - word object\n* @param {array} segments - list of segments objects\n* @return {object} - a single segment whose range contains the word\n*/\nfunction findSegmentForWord(word, segments) {\n\n const tmpSegment = segments.find((seg) => {\n const segEnd = seg.start + seg.duration;\n\n return ((word.start >= seg.start) && (word.end <= segEnd));\n });\n // if find doesn't find any matches it returns an undefined\n if (tmpSegment === undefined) {\n // covering edge case orphan word not belonging to any segments\n // adding UKN speaker label\n return {\n '@type': 'Segment',\n // keeping both speaker id and gender as this is used later\n // to format speaker label combining the two\n speaker: { '@id': 'UKN', gender: 'U' }\n };\n } else {\n // find returns the first element that matches the criteria\n return tmpSegment;\n }\n}\n\n/**\n* formats kaldi speaker object into a string\n* Combining Gender and speaker Id\n* @param {object} speaker - BBC kaldi speaker object\n* @return {string} -\n*/\nfunction formatSpeakerName(speaker) {\n return speaker.gender + '_' + speaker['@id'];\n}\n\nexport default groupWordsInParagraphsBySpeakers;","/**\n * Convert BBC Kaldi json to draftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n *\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport groupWordsInParagraphsBySpeakers from './group-words-by-speakers.js';\n/**\n * groups words list from kaldi transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words opbjects from kaldi transcript\n */\n\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n words.forEach(word => {\n // if word contains punctuation\n if (/[.?!]/.test(word.punct)) {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n paragraph.text = paragraph.text.join(' ');\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n }\n });\n\n return results;\n};\n\nconst bbcKaldiToDraft = bbcKaldiJson => {\n const results = [];\n let tmpWords;\n let speakerSegmentation = null;\n let wordsByParagraphs = [];\n\n // BBC Octo Labs API Response wraps Kaldi response around retval,\n // while kaldi contains word attribute at root\n if (bbcKaldiJson.retval !== undefined) {\n tmpWords = bbcKaldiJson.retval.words;\n if (bbcKaldiJson.retval.segmentation !== undefined) {\n speakerSegmentation = bbcKaldiJson.retval.segmentation;\n }\n } else {\n tmpWords = bbcKaldiJson.words;\n if (bbcKaldiJson.segmentation !== undefined) {\n speakerSegmentation = bbcKaldiJson.segmentation;\n }\n }\n\n if (speakerSegmentation === null) {\n wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n } else {\n wordsByParagraphs = groupWordsInParagraphsBySpeakers(tmpWords, speakerSegmentation);\n }\n\n wordsByParagraphs.forEach((paragraph, i) => {\n // if paragraph contain words\n // eg sometimes the speaker segmentation might not contain words :man-shrugging:\n if (paragraph.words[0] !== undefined) {\n let speakerLabel = `TBC ${ i }`;\n if (speakerSegmentation !== null) {\n speakerLabel = paragraph.speaker;\n }\n\n const draftJsContentBlockParagraph = {\n text: paragraph.text,\n type: 'paragraph',\n data: {\n speaker: speakerLabel,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'punct') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n }\n });\n\n return results;\n};\n\nexport default bbcKaldiToDraft;\n","/**\n * Convert autoEdit2 Json to draftJS\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index';\n\n/**\n * groups words list from autoEdit transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words objects from autoEdit transcript\n */\n\nconst groupWordsInParagraphs = (autoEditText) => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n autoEditText.forEach((autoEditparagraph) => {\n autoEditparagraph.paragraph.forEach((autoEditLine) => {\n autoEditLine.line.forEach((word) => {\n // adjusting time reference attributes from\n // `startTime` `endTime` to `start` `end`\n // for word object\n const tmpWord = {\n text: word.text,\n start: word.startTime,\n end: word.endTime,\n };\n // if word contains punctuation\n if (/[.?!]/.test(word.text)) {\n paragraph.words.push(tmpWord);\n paragraph.text.push(word.text);\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(tmpWord);\n paragraph.text.push(word.text);\n }\n });\n });\n });\n\n return results;\n};\n\nconst autoEdit2ToDraft = (autoEdit2Json) => {\n const results = [];\n const tmpWords = autoEdit2Json.text;\n const wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n\n wordsByParagraphs.forEach((paragraph, i) => {\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: `TBC ${ i }`,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text'),\n };\n // console.log(JSON.stringify(draftJsContentBlockParagraph,null,2))\n results.push(draftJsContentBlockParagraph);\n });\n\n // console.log(JSON.stringify(results,null,2))\n return results;\n};\n\nexport default autoEdit2ToDraft;\n","/**\n * Convert Speechmatics Json to DraftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\n\n/**\n * Determines the speaker of a paragraph by comparing the start time of the paragraph with\n * the speaker times.\n * @param {float} start - Starting point of paragraph\n * @param {array} speakers - list of all speakers with start and end time\n */\nconst getSpeaker = (start, speakers) => {\n for (var speakerIdx in speakers) {\n const speaker = speakers[speakerIdx];\n const segmentStart = parseFloat(start);\n if (segmentStart >= speaker.start & segmentStart < speaker.end) {\n return speaker.name;\n }\n }\n\n return 'UNK';\n};\n\n/**\n * groups words list from speechmatics based on speaker change and paragraph length.\n * @param {array} words - array of words objects from speechmatics transcript\n * @param {array} speakers - array of speaker objects from speechmatics transcript\n * @param {int} words - number of words which trigger a paragraph break\n */\nconst groupWordsInParagraphs = (words, speakers, maxParagraphWords) => {\n const results = [];\n let paragraph = { words: [], text: [], speaker: '' };\n let oldSpeaker = getSpeaker(words[0].start, speakers);\n let newSpeaker;\n let sentenceEnd = false;\n\n words.forEach((word) => {\n newSpeaker = getSpeaker(word.start, speakers);\n // if speaker changes\n if (newSpeaker !== oldSpeaker || (paragraph.words.length > maxParagraphWords && sentenceEnd)) {\n paragraph.speaker = oldSpeaker;\n results.push(paragraph);\n oldSpeaker = newSpeaker;\n // reset paragraph\n paragraph = { words: [], text: [] };\n }\n paragraph.words.push(word);\n paragraph.text.push(word.punct);\n sentenceEnd = /[.?!]/.test(word.punct) ? true : false;\n });\n\n paragraph.speaker = oldSpeaker;\n results.push(paragraph);\n\n return results;\n};\n\n/**\n * Speechmatics treats punctuation as own words. This function merges punctuations with\n * the pevious word and adjusts the total duration of the word.\n * @param {array} words - array of words objects from speechmatics transcript\n */\nconst curatePunctuation = (words) => {\n const curatedWords = [];\n words.forEach((word) => {\n if (/[.?!]/.test(word.name)) {\n curatedWords[curatedWords.length - 1].name = curatedWords[curatedWords.length - 1].name + word.name;\n curatedWords[curatedWords.length - 1].duration = (parseFloat(curatedWords[curatedWords.length - 1].duration) + parseFloat(word.duration)).toString();\n } else {\n curatedWords.push(word);\n }\n }\n );\n\n return curatedWords;\n};\n\nconst speechmaticsToDraft = (speechmaticsJson) => {\n const results = [];\n\n let tmpWords;\n tmpWords = curatePunctuation(speechmaticsJson.words);\n tmpWords = tmpWords.map((element, index) => {\n return ({\n start: element.time,\n end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),\n confidence: element.confidence,\n word: element.name.toLowerCase().replace(/[.?!]/g, ''),\n punct: element.name,\n index: index,\n });\n });\n\n let tmpSpeakers;\n tmpSpeakers = speechmaticsJson.speakers;\n tmpSpeakers = tmpSpeakers.map((element) => {\n return ({\n start: parseFloat(element.time),\n end: (parseFloat(element.time) + parseFloat(element.duration)),\n name: element.name,\n });\n });\n\n const wordsByParagraphs = groupWordsInParagraphs(tmpWords, tmpSpeakers, 150);\n\n wordsByParagraphs.forEach((paragraph) => {\n const paragraphStart = paragraph.words[0].start;\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: paragraph.speaker,\n words: paragraph.words,\n start: paragraphStart\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'punct'), // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n });\n\n return results;\n};\n\nexport default speechmaticsToDraft;\n","export const groupWordsBySpeakerLabel = (words) => {\n const groupedWords = [];\n let currentSpeaker = '';\n words.forEach((word) => {\n if (word.speaker_label === currentSpeaker) {\n groupedWords[groupedWords.length - 1].words.push(word);\n } else {\n currentSpeaker = word.speaker_label;\n // start new speaker block\n groupedWords.push({\n speaker: word.speaker_label,\n words: [ word ] });\n }\n });\n\n return groupedWords;\n};\n\nexport const findSpeakerForWord = (word, segments) => {\n const startTime = parseFloat(word.start_time);\n const endTime = parseFloat(word.end_time);\n const firstMatchingSegment = segments.find((seg) => {\n return startTime >= parseFloat(seg.start_time) && endTime <= parseFloat(seg.end_time);\n });\n if (firstMatchingSegment === undefined) {\n return 'UKN';\n } else {\n return firstMatchingSegment.speaker_label.replace('spk_', '');\n }\n};\n\nconst addSpeakerLabelToWords = (words, segments) => {\n return words.map(w => Object.assign(w, { 'speaker_label': findSpeakerForWord(w, segments) }));\n};\n\nexport const groupWordsBySpeaker = (words, speakerLabels) => {\n const wordsWithSpeakers = addSpeakerLabelToWords(words, speakerLabels.segments);\n\n return groupWordsBySpeakerLabel(wordsWithSpeakers);\n};","/**\n * Converts AWS Transcribe Json to DraftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport { groupWordsBySpeaker } from './group-words-by-speakers';\n\nexport const stripLeadingSpace = word => {\n return word.replace(/^\\s/, '');\n};\n\n/**\n * @param {json} words - List of words\n * @param {string} wordAttributeName - eg 'punct' or 'text' or etc.\n * attribute for the word object containing the text. eg word ={ punct:'helo', ... }\n * or eg word ={ text:'helo', ... }\n */\nexport const getBestAlternativeForWord = word => {\n if (/punctuation/.test(word.type)) {\n return Object.assign(word.alternatives[0], { confidence: 1 }); //Transcribe doesn't provide a confidence for punctuation\n }\n const wordWithHighestConfidence = word.alternatives.reduce(function(\n prev,\n current\n ) {\n return parseFloat(prev.confidence) > parseFloat(current.confidence)\n ? prev\n : current;\n });\n\n return wordWithHighestConfidence;\n};\n\n/**\n * Normalizes words so they can be used in\n * the generic generateEntitiesRanges() method\n **/\nconst normalizeWord = currentWord => {\n const bestAlternative = getBestAlternativeForWord(currentWord);\n\n return {\n start: parseFloat(currentWord.start_time),\n end: parseFloat(currentWord.end_time),\n text: bestAlternative.content,\n confidence: parseFloat(bestAlternative.confidence)\n };\n};\n\nexport const appendPunctuationToPreviousWord = (punctuation, previousWord) => {\n const punctuationContent = punctuation.alternatives[0].content;\n\n return {\n ...previousWord,\n alternatives: previousWord.alternatives.map(w => ({\n ...w,\n content: w.content + stripLeadingSpace(punctuationContent)\n }))\n };\n};\n\nexport const mapPunctuationItemsToWords = words => {\n const itemsToRemove = [];\n const dirtyArray = words.map((word, index) => {\n let previousWord = {};\n if (word.type === 'punctuation') {\n itemsToRemove.push(index - 1);\n previousWord = words[index - 1];\n\n return appendPunctuationToPreviousWord(word, previousWord);\n } else {\n return word;\n }\n });\n\n return dirtyArray.filter((item, index) => {\n return !itemsToRemove.includes(index);\n });\n};\n\n/**\n * groups words list from amazon transcribe transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words objects from kaldi transcript\n */\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = {\n words: [],\n text: []\n };\n words.forEach((word) => {\n const content = getBestAlternativeForWord(word).content;\n const normalizedWord = normalizeWord(word);\n if (/[.?!]/.test(content)) {\n paragraph.words.push(normalizedWord);\n paragraph.text.push(content);\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(normalizedWord);\n paragraph.text.push(content);\n }\n });\n\n return results;\n};\n\nconst groupSpeakerWordsInParagraphs = (words, speakerLabels) => {\n const wordsBySpeaker = groupWordsBySpeaker(words, speakerLabels);\n\n return wordsBySpeaker.map((speakerGroup) => {\n return {\n words: speakerGroup.words.map(normalizeWord),\n text: speakerGroup.words.map((w) => getBestAlternativeForWord(w).content),\n speaker: speakerGroup.speaker\n };\n });\n};\n\nconst amazonTranscribeToDraft = amazonTranscribeJson => {\n const results = [];\n const tmpWords = amazonTranscribeJson.results.items;\n const speakerLabels = amazonTranscribeJson.results.speaker_labels;\n const wordsWithRemappedPunctuation = mapPunctuationItemsToWords(tmpWords);\n const speakerSegmentation = typeof(speakerLabels) != 'undefined';\n\n const wordsByParagraphs = speakerSegmentation ?\n groupSpeakerWordsInParagraphs(wordsWithRemappedPunctuation, speakerLabels) :\n groupWordsInParagraphs(\n wordsWithRemappedPunctuation\n );\n\n wordsByParagraphs.forEach((paragraph, i) => {\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,\n words: paragraph.words,\n start: parseFloat(paragraph.words[0].start)\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n });\n\n return results;\n};\n\nexport default amazonTranscribeToDraft;\n","/**\n * Convert IBM json to draftJS\n * see `sample` folder for example of input and output as well as `example-usage.js`\n *\n */\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\n\nconst ibmToDraft = ibmJson => {\n // helper function to normalise IBM words at line level\n const normalizeTimeStampsToWords = timestamps => {\n return timestamps.map(ibmWord => {\n return {\n text: ibmWord[0],\n start: ibmWord[1],\n end: ibmWord[2]\n };\n });\n };\n\n //\n const normalizeIBMWordsList = ibmResults => {\n const normalisedResults = [];\n ibmResults.forEach(result => {\n // nested array to keep paragraph segmentation same as IBM lines\n normalisedResults.push(normalizeTimeStampsToWords(result.alternatives[0].timestamps));\n // TODO: can be revisited - as separate PR by flattening the array like this\n // normalisedResults = normalisedResults.concact(normalizeTimeStampsToWords(result.alternatives[0].timestamps));\n // addSpeakersToWords function would need adjusting as would be dealing with a 1D array instead of 2D\n // if edge case, like in example file, that there's one speaker recognised through all of speaker segemtnation info\n // could break into paragraph when is over a minute? at end of IBM line?\n // or punctuation, altho IBM does not seem to provide punctuation?\n });\n\n return normalisedResults;\n };\n\n // TODO: could be separate file\n const findSpeakerSegmentForWord = (word, speakerSegments) => {\n const tmpSegment = speakerSegments.find(seg => {\n const segStart = seg.from;\n const segEnd = seg.to;\n\n return ((word.start === segStart) && (word.end === segEnd));\n });\n // if find doesn't find any matches it returns an undefined\n if (tmpSegment === undefined) {\n // covering edge case orphan word not belonging to any segments\n // adding UKN speaker label\n return 'UKN';\n } else {\n // find returns the first element that matches the criteria\n return `S_${ tmpSegment.speaker }`;\n }\n };\n // add speakers to words\n const addSpeakersToWords = (ibmWords, ibmSpeakers) => {\n return ibmWords.map(lines => {\n return lines.map(word => {\n\n word.speaker = findSpeakerSegmentForWord(word, ibmSpeakers);\n\n return word;\n });\n });\n };\n\n const ibmNormalisedWordsToDraftJs = (ibmNormalisedWordsWithSpeakers) => {\n const draftJsParagraphsResults = [];\n ibmNormalisedWordsWithSpeakers.forEach((ibmParagraph) => {\n const draftJsContentBlockParagraph = {\n text: ibmParagraph.map((word) => {return word.text;}).join(' '),\n type: 'paragraph',\n data: {\n // Assuming each paragraph in IBM line is the same\n // for context it just seems like the IBM data structure gives you word level speakers,\n // but also gives you \"lines\" so assuming each word in a line has the same speaker.\n speaker: ibmParagraph[0].speaker,\n words: ibmParagraph,\n start: ibmParagraph[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(ibmParagraph, 'text'), // wordAttributeName\n };\n draftJsParagraphsResults.push(draftJsContentBlockParagraph);\n });\n\n return draftJsParagraphsResults;\n };\n\n const normalisedWords = normalizeIBMWordsList(ibmJson.results[0].results);\n // TODO: nested array of words, to keep some sort of paragraphs, in case there's only one speaker\n // can be refactored/optimised later\n const ibmNormalisedWordsWithSpeakers = addSpeakersToWords(normalisedWords, ibmJson.results[0].speaker_labels);\n const ibmDratJs = ibmNormalisedWordsToDraftJs(ibmNormalisedWordsWithSpeakers);\n\n return ibmDratJs;\n};\n\nexport default ibmToDraft;\n","/**\n * Convert Digital Paper Edit transcript json format to DraftJS\n * More details see\n * https://github.com/bbc/digital-paper-edit\n */\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\nimport groupWordsInParagraphsBySpeakers from './group-words-by-speakers.js';\n/**\n * groups words list from kaldi transcript based on punctuation.\n * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.\n * @param {array} words - array of words opbjects from kaldi transcript\n */\nconst groupWordsInParagraphs = words => {\n const results = [];\n let paragraph = { words: [], text: [] };\n\n words.forEach(word => {\n // if word contains punctuation\n if (/[.?!]/.test(word.text)) {\n paragraph.words.push(word);\n paragraph.text.push(word.text);\n paragraph.text = paragraph.text.join(' ');\n results.push(paragraph);\n // reset paragraph\n paragraph = { words: [], text: [] };\n } else {\n paragraph.words.push(word);\n paragraph.text.push(word.text);\n }\n });\n\n return results;\n};\n\nconst digitalPaperEditToDraft = (digitalPaperEditTranscriptJson) => {\n const results = [];\n let speakerSegmentation = null;\n let wordsByParagraphs = [];\n const tmpWords = digitalPaperEditTranscriptJson.words;\n\n if (digitalPaperEditTranscriptJson.paragraphs) {\n speakerSegmentation = digitalPaperEditTranscriptJson.paragraphs;\n }\n\n if (!speakerSegmentation) {\n wordsByParagraphs = groupWordsInParagraphs(tmpWords);\n } else {\n wordsByParagraphs = groupWordsInParagraphsBySpeakers(tmpWords, digitalPaperEditTranscriptJson.paragraphs );\n }\n\n wordsByParagraphs.forEach((paragraph, i) => {\n // if paragraph contain words\n // eg sometimes the speaker segmentation might not contain words :man-shrugging:\n if (paragraph.words[0]) {\n let speakerLabel = `TBC ${ i }`;\n if (speakerSegmentation) {\n speakerLabel = paragraph.speaker;\n }\n\n const draftJsContentBlockParagraph = {\n text: paragraph.text,\n type: 'paragraph',\n data: {\n speaker: speakerLabel,\n words: paragraph.words,\n start: paragraph.words[0].start\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n }\n });\n\n return results;\n};\n\nexport default digitalPaperEditToDraft;\n","/**\n * Helper function to generate draft.js entityMap from draftJS blocks,\n */\n\n/**\n * helper function to flatten a list.\n * converts nested arrays into one dimensional array\n * @param {array} list\n */\nconst flatten = list => list.reduce((a, b) => a.concat(Array.isArray(b) ? flatten(b) : b), []);\n\n/**\n * helper function to create createEntityMap\n * @param {*} blocks - draftJs blocks\n */\nconst createEntityMap = (blocks) => {\n const entityRanges = blocks.map(block => block.entityRanges);\n const flatEntityRanges = flatten(entityRanges);\n\n const entityMap = {};\n\n flatEntityRanges.forEach((data) => {\n entityMap[data.key] = {\n type: 'WORD',\n mutability: 'MUTABLE',\n data,\n };\n });\n\n return entityMap;\n};\n\nexport default createEntityMap;","/**\n * Converts GCP Speech to Text Json to DraftJs\n * see `sample` folder for example of input and output as well as `example-usage.js`\n */\n\nimport generateEntitiesRanges from '../generate-entities-ranges/index.js';\n\nconst NANO_SECOND = 1000000000;\n\n/**\n * attribute for the sentences object containing the text. eg sentences ={ punct:'helo', ... }\n * or eg sentences ={ text:'hello', ... }\n * @param sentences\n */\nexport const getBestAlternativeSentence = sentences => {\n if (sentences.alternatives.length === 0) {\n return sentences[0];\n }\n\n const sentenceWithHighestConfidence = sentences.alternatives.reduce(function(\n prev,\n current\n ) {\n return parseFloat(prev.confidence) > parseFloat(current.confidence)\n ? prev\n : current;\n });\n\n return sentenceWithHighestConfidence;\n};\n\nexport const trimLeadingAndTailingWhiteSpace = text => {\n return text.trim();\n};\n\n/**\n * GCP does not provide a nanosecond attribute if the word starts at 0 nanosecond\n * @param startSecond\n * @param nanoSecond\n * @returns {number}\n */\nconst computeTimeInSeconds = (startSecond, nanoSecond) => {\n\n let seconds = parseFloat(startSecond);\n\n if (nanoSecond !== undefined) {\n seconds = seconds + parseFloat(nanoSecond / NANO_SECOND);\n }\n\n return seconds;\n};\n\n/**\n * Normalizes words so they can be used in\n * the generic generateEntitiesRanges() method\n **/\nconst normalizeWord = (currentWord, confidence) => {\n\n return {\n start: computeTimeInSeconds(currentWord.startTime.seconds, currentWord.startTime.nanos),\n end: computeTimeInSeconds(currentWord.endTime.seconds, currentWord.endTime.nanos),\n text: currentWord.word,\n confidence: confidence\n };\n};\n\n/**\n * groups words list from GCP Speech to Text response.\n * @param {array} sentences - array of sentence objects from GCP STT\n */\nconst groupWordsInParagraphs = sentences => {\n const results = [];\n let paragraph = {\n words: [],\n text: []\n };\n\n sentences.forEach((sentence) => {\n const bestAlternative = getBestAlternativeSentence(sentence);\n paragraph.text.push(trimLeadingAndTailingWhiteSpace(bestAlternative.transcript));\n\n bestAlternative.words.forEach((word) => {\n paragraph.words.push(normalizeWord(word, bestAlternative.confidence));\n });\n results.push(paragraph);\n paragraph = { words: [], text: [] };\n });\n\n return results;\n};\n\nconst gcpSttToDraft = gcpSttJson => {\n const results = [];\n // const speakerLabels = gcpSttJson.results[0]['alternatives'][0]['words'][0]['speakerTag']\n // let speakerSegmentation = typeof(speakerLabels) != 'undefined';\n\n const wordsByParagraphs = groupWordsInParagraphs(gcpSttJson.results);\n\n wordsByParagraphs.forEach((paragraph, i) => {\n const draftJsContentBlockParagraph = {\n text: paragraph.text.join(' '),\n type: 'paragraph',\n data: {\n speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,\n words: paragraph.words,\n start: parseFloat(paragraph.words[0].start)\n },\n // the entities as ranges are each word in the space-joined text,\n // so it needs to be compute for each the offset from the beginning of the paragraph and the length\n entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName\n };\n results.push(draftJsContentBlockParagraph);\n });\n\n return results;\n};\n\nexport default gcpSttToDraft;\n","import bbcKaldiToDraft from './bbc-kaldi/index';\nimport autoEdit2ToDraft from './autoEdit2/index';\nimport speechmaticsToDraft from './speechmatics/index';\nimport amazonTranscribeToDraft from './amazon-transcribe/index';\nimport ibmToDraft from './ibm/index';\nimport digitalPaperEditToDraft from './digital-paper-edit/index';\nimport createEntityMap from './create-entity-map/index';\nimport gcpSttToDraft from './google-stt/index';\n\n/**\n * Adapters for STT conversion\n * @param {json} transcriptData - A json transcript with some word accurate timecode\n * @param {string} sttJsonType - the type of transcript supported by the available adapters\n */\nconst sttJsonAdapter = (transcriptData, sttJsonType) => {\n let blocks;\n switch (sttJsonType) {\n case 'bbckaldi':\n blocks = bbcKaldiToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'autoedit2':\n blocks = autoEdit2ToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'speechmatics':\n blocks = speechmaticsToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'ibm':\n blocks = ibmToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'draftjs':\n return transcriptData; // (typeof transcriptData === 'string')? JSON.parse(transcriptData): transcriptData;\n\n case 'amazontranscribe':\n blocks = amazonTranscribeToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n case 'digitalpaperedit':\n blocks = digitalPaperEditToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n\n case 'google-stt':\n blocks = gcpSttToDraft(transcriptData);\n\n return { blocks, entityMap: createEntityMap(blocks) };\n\n default:\n // code block\n console.error('Did not recognize the stt engine.');\n }\n};\n\nexport default sttJsonAdapter;\nexport { createEntityMap };","/**\n edge cases\n- more segments then words - not an issue if you start by matching words with segment\nand handle edge case where it doesn't find a match\n- more words then segments - orphan words?\n*\n* Takes in list of words and list of paragraphs (paragraphs have speakers info associated with it)\n```js\n{\n \"words\": [\n {\n \"id\": 0,\n \"start\": 13.02,\n \"end\": 13.17,\n \"text\": \"There\"\n },\n {\n \"id\": 1,\n \"start\": 13.17,\n \"end\": 13.38,\n \"text\": \"is\"\n },\n ...\n ],\n \"paragraphs\": [\n {\n \"id\": 0,\n \"start\": 13.02,\n \"end\": 13.86,\n \"speaker\": \"TBC 00\"\n },\n {\n \"id\": 1,\n \"start\": 13.86,\n \"end\": 19.58,\n \"speaker\": \"TBC 1\"\n },\n ...\n ]\n}\n```\n* and returns a list of words grouped into paragraphs, with words, text and speaker attribute\n```js\n[\n {\n \"words\": [\n {\n \"id\": 0,\n \"start\": 13.02,\n \"end\": 13.17,\n \"text\": \"There\"\n },\n {\n \"id\": 1,\n \"start\": 13.17,\n \"end\": 13.38,\n \"text\": \"is\"\n },\n {\n \"id\": 2,\n \"start\": 13.38,\n \"end\": 13.44,\n \"text\": \"a\"\n },\n {\n \"id\": 3,\n \"start\": 13.44,\n \"end\": 13.86,\n \"text\": \"day.\"\n }\n ],\n \"text\": \"There is a day.\",\n \"speaker\": \"TBC 00\"\n },\n ...\n]\n```\n */\nfunction groupWordsInParagraphsBySpeakers(words, segments) {\n const result = addWordsToSpeakersParagraphs(words, segments);\n\n return result;\n};\n\nfunction addWordsToSpeakersParagraphs (words, segments) {\n const results = [];\n let currentSegment = 'UKN';\n let currentSegmentIndex = 0;\n let previousSegmentIndex = 0;\n let paragraph = { words: [], text: '', speaker: '' };\n words.forEach((word) => {\n currentSegment = findSegmentForWord(word, segments);\n // if a segment exists for the word\n if (currentSegment) {\n currentSegmentIndex = segments.indexOf(currentSegment);\n if (currentSegmentIndex === previousSegmentIndex) {\n paragraph.words.push(word);\n paragraph.text += word.text + ' ';\n paragraph.speaker = currentSegment.speaker;\n }\n else {\n previousSegmentIndex = currentSegmentIndex;\n paragraph.text.trim();\n results.push(paragraph);\n paragraph = { words: [], text: '', speaker: '' };\n paragraph.words.push(word);\n paragraph.text += word.text + ' ';\n paragraph.speaker = currentSegment.speaker;\n }\n }\n });\n results.push(paragraph);\n\n return results;\n}\n\n/**\n* Helper functions\n*/\n\n/**\n* given word start and end time attributes\n* looks for segment range that contains that word\n* if it doesn't find any it returns a segment with `UKN`\n* speaker attributes.\n* @param {object} word - word object\n* @param {array} segments - list of segments objects\n* @return {object} - a single segment whose range contains the word\n*/\nfunction findSegmentForWord(word, segments) {\n\n const tmpSegment = segments.find((seg) => {\n if ((word.start >= seg.start) && (word.end <= seg.end)) {\n return seg;\n }\n });\n\n return tmpSegment;\n}\n\nexport default groupWordsInParagraphsBySpeakers;"],"sourceRoot":""}
\No newline at end of file