1 | /**
|
2 | * affectimo
|
3 | * v0.1.3
|
4 | *
|
5 | * Analyse the affect (sentiment / valence) and intensity (arousal) of a string.
|
6 | *
|
7 | * Help me make this better:
|
8 | * https://github.com/phugh/affectimo
|
9 | *
|
10 | * Based on this paper:
|
11 | * Sedoc J., Preotiuc-Pietro D. & Ungar, L. (2017). Predicting Emotional Word Ratings using Distributional Representations and Signed Clustering. Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, EACL.
|
12 | *
|
13 | * Using the affect/intensity lexicon data from http://www.wwbp.org/lexica.html
|
14 | * Used under the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported licence
|
15 | *
|
16 | * (C) 2017 P. Hughes
|
17 | * Licence : Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported
|
18 | * http://creativecommons.org/licenses/by-nc-sa/3.0/
|
19 | *
|
20 | * Usage example:
|
21 | * const affectimo = require('affectimo');
|
22 | * const text = "A big long string of text...";
|
23 | * let ai = affectimo(text);
|
24 | * console.log(ai)
|
25 | *
|
26 | * Affect range: 1 = very negative, 5 = neutral, 9 = very positive
|
27 | * Intensity range: 1 = neutral/objective to 9 = very high
|
28 | *
|
29 | * @param {string} str input string
|
30 | * @return {Object} object with 'AFFECT' and 'INTENSITY' keys
|
31 | */
|
32 |
|
33 |
|
34 | ;(function () {
|
35 | const root = this
|
36 | const previous = root.affectimo
|
37 |
|
38 | const hasRequire = typeof require !== 'undefined'
|
39 |
|
40 | let tokenizer = root.tokenizer
|
41 | let lexicon = root.lexicon
|
42 |
|
43 | if (typeof _ === 'undefined') {
|
44 | if (hasRequire) {
|
45 | tokenizer = require('happynodetokenizer')
|
46 | lexicon = require('./data/lexicon.json')
|
47 | } else throw new Error('affectimo required happynodetokenizer and ./data/lexicon.json')
|
48 | }
|
49 |
|
50 | // get number of times el appears in an array
|
51 | Array.prototype.indexesOf = function (el) {
|
52 | const idxs = []
|
53 | const len = this.length
|
54 | let i = len - 1
|
55 | for (i; i >= 0; i--) {
|
56 | if (this[i] === el) {
|
57 | idxs.unshift(i)
|
58 | }
|
59 | }
|
60 | return idxs
|
61 | }
|
62 |
|
63 | /**
|
64 | * @function getMatches
|
65 | * @param {Array} arr token array
|
66 | * @return {Object} object of matches
|
67 | */
|
68 | const getMatches = (arr) => {
|
69 | const matches = {}
|
70 | // loop through the lexicon categories
|
71 | let cat // category
|
72 | for (cat in lexicon) {
|
73 | if (!lexicon.hasOwnProperty(cat)) continue
|
74 | let match = []
|
75 | // loop through words in category
|
76 | let data = lexicon[cat]
|
77 | let key
|
78 | for (key in data) {
|
79 | if (!data.hasOwnProperty(key)) continue
|
80 | // if word from input matches word from lexicon ...
|
81 | if (arr.indexOf(key) > -1) {
|
82 | let item
|
83 | let weight = data[key]
|
84 | let reps = arr.indexesOf(key).length // numbder of times the word appears in the input text
|
85 | if (reps > 1) { // if the word appears more than once, group all appearances in one array
|
86 | let words = []
|
87 | for (let i = 0; i < reps; i++) {
|
88 | words.push(key)
|
89 | }
|
90 | item = [words, weight]
|
91 | } else {
|
92 | item = [key, weight]
|
93 | }
|
94 | match.push(item)
|
95 | }
|
96 | matches[cat] = match
|
97 | }
|
98 | }
|
99 | // return matches object
|
100 | return matches
|
101 | }
|
102 |
|
103 | /**
|
104 | * @function calcLex
|
105 | * @param {Object} obj matches object
|
106 | * @param {number} wc word count
|
107 | * @param {number} int intercept value
|
108 | * @return {number} lexical value
|
109 | */
|
110 | const calcLex = (obj, wc, int) => {
|
111 | // loop through the matches and add up the weights
|
112 | let lex = 0
|
113 | let key
|
114 | for (key in obj) {
|
115 | if (!obj.hasOwnProperty(key)) continue
|
116 | let weight = Number(obj[key][1])
|
117 | lex += weight
|
118 | }
|
119 | // add the intercept value
|
120 | lex += Number(int)
|
121 | // return final lexical value + intercept
|
122 | return Number(lex)
|
123 | }
|
124 |
|
125 | /**
|
126 | * @function affectimo
|
127 | * @param {string} str input string
|
128 | * @return {Object} object of lexical values
|
129 | */
|
130 | const affectimo = (str) => {
|
131 | // make sure there is input before proceeding
|
132 | if (str == null) return {AFFECT: 0, INTENSITY: 0}
|
133 | // make sure we're working with a string
|
134 | if (typeof str !== 'string') str = str.toString()
|
135 | // trim whitespace and convert to lowercase
|
136 | str = str.toLowerCase().trim()
|
137 | // convert our string to tokens
|
138 | const tokens = tokenizer(str)
|
139 | // if no tokens return 0
|
140 | if (tokens == null) return {AFFECT: 0, INTENSITY: 0}
|
141 | // get matches from array
|
142 | const matches = getMatches(tokens)
|
143 | // get wordcount
|
144 | const wordcount = tokens.length
|
145 | // calculate lexical useage
|
146 | const lex = {}
|
147 | lex.AFFECT = calcLex(matches.AFFECT, wordcount, 5.037104721).toFixed(2)
|
148 | lex.INTENSITY = calcLex(matches.INTENSITY, wordcount, 2.399762631).toFixed(2)
|
149 | // return lexical value
|
150 | return lex
|
151 | }
|
152 |
|
153 | affectimo.noConflict = function () {
|
154 | root.affectimo = previous
|
155 | return affectimo
|
156 | }
|
157 |
|
158 | if (typeof exports !== 'undefined') {
|
159 | if (typeof module !== 'undefined' && module.exports) {
|
160 | exports = module.exports = affectimo
|
161 | }
|
162 | exports.affectimo = affectimo
|
163 | } else {
|
164 | root.affectimo = affectimo
|
165 | }
|
166 | }).call(this)
|