UNPKG

14 kBtext/coffeescriptView Raw
1adjacency_graphs = require('./adjacency_graphs')
2
3# on qwerty, 'g' has degree 6, being adjacent to 'ftyhbv'. '\' has degree 1.
4# this calculates the average over all keys.
5calc_average_degree = (graph) ->
6 average = 0
7 for key, neighbors of graph
8 average += (n for n in neighbors when n).length
9 average /= (k for k,v of graph).length
10 average
11
12BRUTEFORCE_CARDINALITY = 10
13MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000
14MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
15MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
16
17scoring =
18 nCk: (n, k) ->
19 # http://blog.plover.com/math/choose.html
20 return 0 if k > n
21 return 1 if k == 0
22 r = 1
23 for d in [1..k]
24 r *= n
25 r /= d
26 n -= 1
27 r
28
29 log10: (n) -> Math.log(n) / Math.log(10) # IE doesn't support Math.log10 :(
30 log2: (n) -> Math.log(n) / Math.log(2)
31
32 factorial: (n) ->
33 # unoptimized, called only on small n
34 return 1 if n < 2
35 f = 1
36 f *= i for i in [2..n]
37 f
38
39 # ------------------------------------------------------------------------------
40 # search --- most guessable match sequence -------------------------------------
41 # ------------------------------------------------------------------------------
42 #
43 # takes a sequence of overlapping matches, returns the non-overlapping sequence with
44 # minimum guesses. O(nml) dp alg for length-n password with m candidate matches
45 # and optimal length-l sequence.
46 #
47 # the optimal "minimum guesses" sublist is here defined to be the sublist that
48 # minimizes:
49 #
50 # l! * Product(m.guesses for m in sequence) + D^(l - 1)
51 #
52 # where l is the length of the sequence.
53 #
54 # the factorial term is the number of ways to order l patterns.
55 #
56 # the D^(l-1) term is another length penalty, roughly capturing the idea that an
57 # attacker will try lower-length sequences first before trying length-l sequences.
58 #
59 # for example, consider a sequence that is date-repeat-dictionary.
60 # - an attacker would need to try other date-repeat-dictionary combinations,
61 # hence the product term.
62 # - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
63 # ..., hence the factorial term.
64 # - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
65 # sequences before length-3. assuming at minimum D guesses per pattern type,
66 # D^(l-1) approximates Sum(D^i for i in [1..l-1]
67 #
68 # ------------------------------------------------------------------------------
69
70 most_guessable_match_sequence: (password, matches, _exclude_additive=false) ->
71
72 # at [k][l], the product of guesses of the optimal sequence of length-l
73 # covering password[0..k].
74 optimal_product = []
75
76 # at [k][l], the final match (match.j == k) in said optimal sequence of length l.
77 backpointers = []
78
79 max_l = 0 # max-length sequence ever recorded
80 optimal_l = null # length of current optimal sequence
81
82 make_bruteforce_match = (i, j) =>
83 match =
84 pattern: 'bruteforce'
85 token: password[i..j]
86 i: i
87 j: j
88 match
89
90 score = (guess_product, sequence_length) =>
91 result = @factorial(sequence_length) * guess_product
92 unless _exclude_additive
93 result += Math.pow MIN_GUESSES_BEFORE_GROWING_SEQUENCE, sequence_length - 1
94 result
95
96 for k in [0...password.length]
97 backpointers[k] = []
98 optimal_product[k] = []
99 optimal_score = Infinity
100 for prev_l in [0..max_l]
101 # for each new k, starting scenario to try to beat: bruteforce matches
102 # involving the lowest-possible l. three cases:
103 #
104 # 1. all-bruteforce match (for length-1 sequences.)
105 # 2. extending a previous bruteforce match
106 # (possible when optimal[k-1][l] ends in bf.)
107 # 3. starting a new single-char bruteforce match
108 # (possible when optimal[k-1][l] exists but does not end in bf.)
109 #
110 # otherwise: there is no bruteforce starting scenario that might be better
111 # than already-discovered lower-l sequences.
112 consider_bruteforce = true
113 bf_j = k
114 if prev_l == 0
115 bf_i = 0
116 new_l = 1
117 else if backpointers[k-1]?[prev_l]?.pattern == 'bruteforce'
118 bf_i = backpointers[k-1][prev_l].i
119 new_l = prev_l
120 else if backpointers[k-1]?[prev_l]?
121 bf_i = k
122 new_l = prev_l + 1
123 else
124 consider_bruteforce = false
125
126 if consider_bruteforce
127 bf_match = make_bruteforce_match bf_i, bf_j
128 prev_j = k - bf_match.token.length # end of preceeding match
129 candidate_product = @estimate_guesses bf_match, password
130 candidate_product *= optimal_product[prev_j][new_l - 1] if new_l > 1
131 candidate_score = score candidate_product, new_l
132 if candidate_score < optimal_score
133 optimal_score = candidate_score
134 optimal_product[k][new_l] = candidate_product
135 optimal_l = new_l
136 max_l = Math.max max_l, new_l
137 backpointers[k][new_l] = bf_match
138
139 # now try beating those bruteforce starting scenarios.
140 # for each match m ending at k, see if forming a (prev_l + 1) sequence
141 # ending at m is better than the current optimum.
142 for match in matches when match.j == k
143 [i, j] = [match.i, match.j]
144 if prev_l == 0
145 # if forming a len-1 sequence [match], match.i must fully cover [0..k]
146 continue unless i == 0
147 else
148 # it's only possible to form a new potentially-optimal sequence ending at
149 # match when there's an optimal length-prev_l sequence ending at match.i-1.
150 continue unless optimal_product[i-1]?[prev_l]?
151 candidate_product = @estimate_guesses match, password
152 candidate_product *= optimal_product[i-1][prev_l] if prev_l > 0
153 candidate_score = score candidate_product, prev_l + 1
154 if candidate_score < optimal_score
155 optimal_score = candidate_score
156 optimal_product[k][prev_l+1] = candidate_product
157 optimal_l = prev_l + 1
158 max_l = Math.max max_l, prev_l+1
159 backpointers[k][prev_l+1] = match
160
161 # walk backwards and decode the optimal sequence
162 match_sequence = []
163 l = optimal_l
164 k = password.length - 1
165 while k >= 0
166 match = backpointers[k][l]
167 match_sequence.push match
168 k = match.i - 1
169 l -= 1
170 match_sequence.reverse()
171
172 # corner: empty password
173 if password.length == 0
174 guesses = 1
175 else
176 guesses = optimal_score
177
178 # final result object
179 password: password
180 guesses: guesses
181 guesses_log10: @log10 guesses
182 sequence: match_sequence
183
184 # ------------------------------------------------------------------------------
185 # guess estimation -- one function per match pattern ---------------------------
186 # ------------------------------------------------------------------------------
187
188 estimate_guesses: (match, password) ->
189 return match.guesses if match.guesses? # a match's guess estimate doesn't change. cache it.
190 min_guesses = 1
191 if match.token.length < password.length
192 min_guesses = if match.token.length == 1
193 MIN_SUBMATCH_GUESSES_SINGLE_CHAR
194 else
195 MIN_SUBMATCH_GUESSES_MULTI_CHAR
196 estimation_functions =
197 bruteforce: @bruteforce_guesses
198 dictionary: @dictionary_guesses
199 spatial: @spatial_guesses
200 repeat: @repeat_guesses
201 sequence: @sequence_guesses
202 regex: @regex_guesses
203 date: @date_guesses
204 guesses = estimation_functions[match.pattern].call this, match
205 match.guesses = Math.max guesses, min_guesses
206 match.guesses_log10 = @log10 match.guesses
207 match.guesses
208
209 bruteforce_guesses: (match) ->
210 guesses = Math.pow BRUTEFORCE_CARDINALITY, match.token.length
211 # small detail: make bruteforce matches at minimum one guess bigger than smallest allowed
212 # submatch guesses, such that non-bruteforce submatches over the same [i..j] take precidence.
213 min_guesses = if match.token.length == 1
214 MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
215 else
216 MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
217 Math.max guesses, min_guesses
218
219 repeat_guesses: (match) ->
220 match.base_guesses * match.repeat_count
221
222 sequence_guesses: (match) ->
223 first_chr = match.token.charAt(0)
224 # lower guesses for obvious starting points
225 if first_chr in ['a', 'A', 'z', 'Z', '0', '1', '9']
226 base_guesses = 4
227 else
228 if first_chr.match /\d/
229 base_guesses = 10 # digits
230 else
231 # could give a higher base for uppercase,
232 # assigning 26 to both upper and lower sequences is more conservative.
233 base_guesses = 26
234 if not match.ascending
235 # need to try a descending sequence in addition to every ascending sequence ->
236 # 2x guesses
237 base_guesses *= 2
238 base_guesses * match.token.length
239
240 MIN_YEAR_SPACE: 20
241 REFERENCE_YEAR: 2000
242 regex_guesses: (match) ->
243 char_class_bases =
244 alpha_lower: 26
245 alpha_upper: 26
246 alpha: 52
247 alphanumeric: 62
248 digits: 10
249 symbols: 33
250 if match.regex_name of char_class_bases
251 Math.pow(char_class_bases[match.regex_name], match.token.length)
252 else switch match.regex_name
253 when 'recent_year'
254 # conservative estimate of year space: num years from REFERENCE_YEAR.
255 # if year is close to REFERENCE_YEAR, estimate a year space of MIN_YEAR_SPACE.
256 year_space = Math.abs parseInt(match.regex_match[0]) - @REFERENCE_YEAR
257 year_space = Math.max year_space, @MIN_YEAR_SPACE
258 year_space
259
260 date_guesses: (match) ->
261 # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
262 year_space = Math.max(Math.abs(match.year - @REFERENCE_YEAR), @MIN_YEAR_SPACE)
263 guesses = year_space * 31 * 12
264 # double for four-digit years
265 guesses *= 2 if match.has_full_year
266 # add factor of 4 for separator selection (one of ~4 choices)
267 guesses *= 4 if match.separator
268 guesses
269
270 KEYBOARD_AVERAGE_DEGREE: calc_average_degree(adjacency_graphs.qwerty)
271 # slightly different for keypad/mac keypad, but close enough
272 KEYPAD_AVERAGE_DEGREE: calc_average_degree(adjacency_graphs.keypad)
273
274 KEYBOARD_STARTING_POSITIONS: (k for k,v of adjacency_graphs.qwerty).length
275 KEYPAD_STARTING_POSITIONS: (k for k,v of adjacency_graphs.keypad).length
276
277 spatial_guesses: (match) ->
278 if match.graph in ['qwerty', 'dvorak']
279 s = @KEYBOARD_STARTING_POSITIONS
280 d = @KEYBOARD_AVERAGE_DEGREE
281 else
282 s = @KEYPAD_STARTING_POSITIONS
283 d = @KEYPAD_AVERAGE_DEGREE
284 guesses = 0
285 L = match.token.length
286 t = match.turns
287 # estimate the number of possible patterns w/ length L or less with t turns or less.
288 for i in [2..L]
289 possible_turns = Math.min(t, i - 1)
290 for j in [1..possible_turns]
291 guesses += @nCk(i - 1, j - 1) * s * Math.pow(d, j)
292 # add extra guesses for shifted keys. (% instead of 5, A instead of a.)
293 # math is similar to extra guesses of l33t substitutions in dictionary matches.
294 if match.shifted_count
295 S = match.shifted_count
296 U = match.token.length - match.shifted_count # unshifted count
297 if S == 0 or U == 0
298 guesses *= 2
299 else
300 shifted_variations = 0
301 shifted_variations += @nCk(S + U, i) for i in [1..Math.min(S, U)]
302 guesses *= shifted_variations
303 guesses
304
305 dictionary_guesses: (match) ->
306 match.base_guesses = match.rank # keep these as properties for display purposes
307 match.uppercase_variations = @uppercase_variations match
308 match.l33t_variations = @l33t_variations match
309 reversed_variations = match.reversed and 2 or 1
310 match.base_guesses * match.uppercase_variations * match.l33t_variations * reversed_variations
311
312 START_UPPER: /^[A-Z][^A-Z]+$/
313 END_UPPER: /^[^A-Z]+[A-Z]$/
314 ALL_UPPER: /^[^a-z]+$/
315 ALL_LOWER: /^[^A-Z]+$/
316
317 uppercase_variations: (match) ->
318 word = match.token
319 return 1 if word.match @ALL_LOWER
320 # a capitalized word is the most common capitalization scheme,
321 # so it only doubles the search space (uncapitalized + capitalized).
322 # allcaps and end-capitalized are common enough too, underestimate as 2x factor to be safe.
323 for regex in [@START_UPPER, @END_UPPER, @ALL_UPPER]
324 return 2 if word.match regex
325 # otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters
326 # with U uppercase letters or less. or, if there's more uppercase than lower (for eg. PASSwORD),
327 # the number of ways to lowercase U+L letters with L lowercase letters or less.
328 U = (chr for chr in word.split('') when chr.match /[A-Z]/).length
329 L = (chr for chr in word.split('') when chr.match /[a-z]/).length
330 variations = 0
331 variations += @nCk(U + L, i) for i in [1..Math.min(U, L)]
332 variations
333
334 l33t_variations: (match) ->
335 return 1 if not match.l33t
336 variations = 1
337 for subbed, unsubbed of match.sub
338 # lower-case match.token before calculating: capitalization shouldn't affect l33t calc.
339 chrs = match.token.toLowerCase().split('')
340 S = (chr for chr in chrs when chr == subbed).length # num of subbed chars
341 U = (chr for chr in chrs when chr == unsubbed).length # num of unsubbed chars
342 if S == 0 or U == 0
343 # for this sub, password is either fully subbed (444) or fully unsubbed (aaa)
344 # treat that as doubling the space (attacker needs to try fully subbed chars in addition to
345 # unsubbed.)
346 variations *= 2
347 else
348 # this case is similar to capitalization:
349 # with aa44a, U = 3, S = 2, attacker needs to try unsubbed + one sub + two subs
350 p = Math.min(U, S)
351 possibilities = 0
352 possibilities += @nCk(U + S, i) for i in [1..p]
353 variations *= possibilities
354 variations
355
356 # utilities --------------------------------------------------------------------
357
358module.exports = scoring