1 | frequency_lists = require('./frequency_lists')
|
2 | adjacency_graphs = require('./adjacency_graphs')
|
3 | scoring = require('./scoring')
|
4 |
|
5 | build_ranked_dict = (ordered_list) ->
|
6 | result = {}
|
7 | i = 1
|
8 | for word in ordered_list
|
9 | result[word] = i
|
10 | i += 1
|
11 | result
|
12 |
|
13 | RANKED_DICTIONARIES =
|
14 | passwords: build_ranked_dict frequency_lists.passwords
|
15 | english: build_ranked_dict frequency_lists.english
|
16 | surnames: build_ranked_dict frequency_lists.surnames
|
17 | male_names: build_ranked_dict frequency_lists.male_names
|
18 | female_names: build_ranked_dict frequency_lists.female_names
|
19 |
|
20 | GRAPHS =
|
21 | qwerty: adjacency_graphs.qwerty
|
22 | dvorak: adjacency_graphs.dvorak
|
23 | keypad: adjacency_graphs.keypad
|
24 | mac_keypad: adjacency_graphs.mac_keypad
|
25 |
|
26 | SEQUENCES =
|
27 | lower: 'abcdefghijklmnopqrstuvwxyz'
|
28 | upper: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
29 | digits: '0123456789'
|
30 |
|
31 | L33T_TABLE =
|
32 | a: ['4', '@']
|
33 | b: ['8']
|
34 | c: ['(', '{', '[', '<']
|
35 | e: ['3']
|
36 | g: ['6', '9']
|
37 | i: ['1', '!', '|']
|
38 | l: ['1', '|', '7']
|
39 | o: ['0']
|
40 | s: ['$', '5']
|
41 | t: ['+', '7']
|
42 | x: ['%']
|
43 | z: ['2']
|
44 |
|
45 | REGEXEN =
|
46 | recent_year: /19\d\d|200\d|201\d/g
|
47 |
|
48 | DATE_MAX_YEAR = 2050
|
49 | DATE_MIN_YEAR = 1000
|
50 | DATE_SPLITS =
|
51 | 4:[
|
52 | [1, 2]
|
53 | [2, 3]
|
54 | ]
|
55 | 5:[
|
56 | [1, 3]
|
57 | [2, 3]
|
58 | ]
|
59 | 6:[
|
60 | [1, 2]
|
61 | [2, 4]
|
62 | [4, 5]
|
63 | ]
|
64 | 7:[
|
65 | [1, 3]
|
66 | [2, 3]
|
67 | [4, 5]
|
68 | [4, 6]
|
69 | ]
|
70 | 8:[
|
71 | [2, 4]
|
72 | [4, 6]
|
73 | ]
|
74 |
|
75 | matching =
|
76 | empty: (obj) -> (k for k of obj).length == 0
|
77 | extend: (lst, lst2) -> lst.push.apply lst, lst2
|
78 | translate: (string, chr_map) -> (chr_map[chr] or chr for chr in string.split('')).join('')
|
79 | mod: (n, m) -> ((n % m) + m) % m
|
80 | sorted: (matches) ->
|
81 |
|
82 | matches.sort (m1, m2) ->
|
83 | (m1.i - m2.i) or (m1.j - m2.j)
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 | omnimatch: (password) ->
|
90 | matches = []
|
91 | matchers = [
|
92 | @dictionary_match
|
93 | @reverse_dictionary_match
|
94 | @l33t_match
|
95 | @spatial_match
|
96 | @repeat_match
|
97 | @sequence_match
|
98 | @regex_match
|
99 | @date_match
|
100 | ]
|
101 | for matcher in matchers
|
102 | @extend matches, matcher.call(this, password)
|
103 | @sorted matches
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 | dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->
|
110 |
|
111 | matches = []
|
112 | len = password.length
|
113 | password_lower = password.toLowerCase()
|
114 | for dictionary_name, ranked_dict of _ranked_dictionaries
|
115 | for i in [0...len]
|
116 | for j in [i...len]
|
117 | if password_lower[i..j] of ranked_dict
|
118 | word = password_lower[i..j]
|
119 | rank = ranked_dict[word]
|
120 | matches.push
|
121 | pattern: 'dictionary'
|
122 | i: i
|
123 | j: j
|
124 | token: password[i..j]
|
125 | matched_word: word
|
126 | rank: rank
|
127 | dictionary_name: dictionary_name
|
128 | reversed: false
|
129 | l33t: false
|
130 | @sorted matches
|
131 |
|
132 | reverse_dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->
|
133 | reversed_password = password.split('').reverse().join('')
|
134 | matches = @dictionary_match reversed_password, _ranked_dictionaries
|
135 | for match in matches
|
136 | match.token = match.token.split('').reverse().join('')
|
137 | match.reversed = true
|
138 |
|
139 | [match.i, match.j] = [
|
140 | password.length - 1 - match.j
|
141 | password.length - 1 - match.i
|
142 | ]
|
143 | @sorted matches
|
144 |
|
145 | set_user_input_dictionary: (ordered_list) ->
|
146 | RANKED_DICTIONARIES['user_inputs'] = build_ranked_dict ordered_list.slice()
|
147 |
|
148 |
|
149 |
|
150 |
|
151 |
|
152 |
|
153 | relevant_l33t_subtable: (password, table) ->
|
154 | password_chars = {}
|
155 | for chr in password.split('')
|
156 | password_chars[chr] = true
|
157 | subtable = {}
|
158 | for letter, subs of table
|
159 | relevant_subs = (sub for sub in subs when sub of password_chars)
|
160 | if relevant_subs.length > 0
|
161 | subtable[letter] = relevant_subs
|
162 | subtable
|
163 |
|
164 |
|
165 | enumerate_l33t_subs: (table) ->
|
166 | keys = (k for k of table)
|
167 | subs = [[]]
|
168 |
|
169 | dedup = (subs) ->
|
170 | deduped = []
|
171 | members = {}
|
172 | for sub in subs
|
173 | assoc = ([k,v] for k,v in sub)
|
174 | assoc.sort()
|
175 | label = (k+','+v for k,v in assoc).join('-')
|
176 | unless label of members
|
177 | members[label] = true
|
178 | deduped.push sub
|
179 | deduped
|
180 |
|
181 | helper = (keys) ->
|
182 | return if not keys.length
|
183 | first_key = keys[0]
|
184 | rest_keys = keys[1..]
|
185 | next_subs = []
|
186 | for l33t_chr in table[first_key]
|
187 | for sub in subs
|
188 | dup_l33t_index = -1
|
189 | for i in [0...sub.length]
|
190 | if sub[i][0] == l33t_chr
|
191 | dup_l33t_index = i
|
192 | break
|
193 | if dup_l33t_index == -1
|
194 | sub_extension = sub.concat [[l33t_chr, first_key]]
|
195 | next_subs.push sub_extension
|
196 | else
|
197 | sub_alternative = sub.slice(0)
|
198 | sub_alternative.splice(dup_l33t_index, 1)
|
199 | sub_alternative.push [l33t_chr, first_key]
|
200 | next_subs.push sub
|
201 | next_subs.push sub_alternative
|
202 | subs = dedup next_subs
|
203 | helper(rest_keys)
|
204 |
|
205 | helper(keys)
|
206 | sub_dicts = []
|
207 | for sub in subs
|
208 | sub_dict = {}
|
209 | for [l33t_chr, chr] in sub
|
210 | sub_dict[l33t_chr] = chr
|
211 | sub_dicts.push sub_dict
|
212 | sub_dicts
|
213 |
|
214 | l33t_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE) ->
|
215 | matches = []
|
216 | for sub in @enumerate_l33t_subs @relevant_l33t_subtable(password, _l33t_table)
|
217 | break if @empty sub
|
218 | subbed_password = @translate password, sub
|
219 | for match in @dictionary_match(subbed_password, _ranked_dictionaries)
|
220 | token = password[match.i..match.j]
|
221 | if token.toLowerCase() == match.matched_word
|
222 | continue
|
223 | match_sub = {}
|
224 | for subbed_chr, chr of sub when token.indexOf(subbed_chr) != -1
|
225 | match_sub[subbed_chr] = chr
|
226 | match.l33t = true
|
227 | match.token = token
|
228 | match.sub = match_sub
|
229 | match.sub_display = ("#{k} -> #{v}" for k,v of match_sub).join(', ')
|
230 | matches.push match
|
231 | @sorted matches.filter (match) ->
|
232 |
|
233 |
|
234 |
|
235 | match.token.length > 1
|
236 |
|
237 |
|
238 |
|
239 |
|
240 |
|
241 | spatial_match: (password, _graphs = GRAPHS) ->
|
242 | matches = []
|
243 | for graph_name, graph of _graphs
|
244 | @extend matches, @spatial_match_helper(password, graph, graph_name)
|
245 | @sorted matches
|
246 |
|
247 | SHIFTED_RX: /[~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?]/
|
248 | spatial_match_helper: (password, graph, graph_name) ->
|
249 | matches = []
|
250 | i = 0
|
251 | while i < password.length - 1
|
252 | j = i + 1
|
253 | last_direction = null
|
254 | turns = 0
|
255 | if graph_name in ['qwerty', 'dvorak'] and @SHIFTED_RX.exec(password.charAt(i))
|
256 |
|
257 | shifted_count = 1
|
258 | else
|
259 | shifted_count = 0
|
260 | loop
|
261 | prev_char = password.charAt(j-1)
|
262 | found = false
|
263 | found_direction = -1
|
264 | cur_direction = -1
|
265 | adjacents = graph[prev_char] or []
|
266 |
|
267 | if j < password.length
|
268 | cur_char = password.charAt(j)
|
269 | for adj in adjacents
|
270 | cur_direction += 1
|
271 | if adj and adj.indexOf(cur_char) != -1
|
272 | found = true
|
273 | found_direction = cur_direction
|
274 | if adj.indexOf(cur_char) == 1
|
275 |
|
276 |
|
277 |
|
278 |
|
279 | shifted_count += 1
|
280 | if last_direction != found_direction
|
281 |
|
282 |
|
283 | turns += 1
|
284 | last_direction = found_direction
|
285 | break
|
286 |
|
287 | if found
|
288 | j += 1
|
289 |
|
290 | else
|
291 | if j - i > 2
|
292 | matches.push
|
293 | pattern: 'spatial'
|
294 | i: i
|
295 | j: j-1
|
296 | token: password[i...j]
|
297 | graph: graph_name
|
298 | turns: turns
|
299 | shifted_count: shifted_count
|
300 |
|
301 | i = j
|
302 | break
|
303 | matches
|
304 |
|
305 |
|
306 |
|
307 |
|
308 |
|
309 | repeat_match: (password) ->
|
310 | matches = []
|
311 | greedy = /(.+)\1+/g
|
312 | lazy = /(.+?)\1+/g
|
313 | lazy_anchored = /^(.+?)\1+$/
|
314 | lastIndex = 0
|
315 | while lastIndex < password.length
|
316 | greedy.lastIndex = lazy.lastIndex = lastIndex
|
317 | greedy_match = greedy.exec password
|
318 | lazy_match = lazy.exec password
|
319 | break unless greedy_match?
|
320 | if greedy_match[0].length > lazy_match[0].length
|
321 |
|
322 |
|
323 |
|
324 | match = greedy_match
|
325 |
|
326 |
|
327 |
|
328 |
|
329 | base_token = lazy_anchored.exec(match[0])[1]
|
330 | else
|
331 |
|
332 |
|
333 |
|
334 | match = lazy_match
|
335 | base_token = match[1]
|
336 | [i, j] = [match.index, match.index + match[0].length - 1]
|
337 |
|
338 | base_analysis = scoring.most_guessable_match_sequence(
|
339 | base_token
|
340 | @omnimatch base_token
|
341 | )
|
342 | base_matches = base_analysis.match_sequence
|
343 | base_guesses = base_analysis.guesses
|
344 | matches.push
|
345 | pattern: 'repeat'
|
346 | i: i
|
347 | j: j
|
348 | token: match[0]
|
349 | base_token: base_token
|
350 | base_guesses: base_guesses
|
351 | base_matches: base_matches
|
352 | repeat_count: match[0].length / base_token.length
|
353 | lastIndex = j + 1
|
354 | matches
|
355 |
|
356 | sequence_match: (password) ->
|
357 | matches = []
|
358 | for sequence_name, sequence of SEQUENCES
|
359 | for direction in [1, -1]
|
360 | i = 0
|
361 | while i < password.length
|
362 | unless password.charAt(i) in sequence
|
363 | i += 1
|
364 | continue
|
365 | j = i + 1
|
366 | sequence_position = sequence.indexOf password.charAt(i)
|
367 | while j < password.length
|
368 |
|
369 | next_sequence_position = @mod sequence_position + direction, sequence.length
|
370 | unless sequence.indexOf(password.charAt(j)) == next_sequence_position
|
371 | break
|
372 | j += 1
|
373 | sequence_position = next_sequence_position
|
374 | j -= 1
|
375 | if j - i + 1 > 1
|
376 | matches.push
|
377 | pattern: 'sequence'
|
378 | i: i
|
379 | j: j
|
380 | token: password[i..j]
|
381 | sequence_name: sequence_name
|
382 | sequence_space: sequence.length
|
383 | ascending: direction == 1
|
384 | i = j + 1
|
385 | @sorted matches
|
386 |
|
387 |
|
388 |
|
389 |
|
390 |
|
391 | regex_match: (password, _regexen = REGEXEN) ->
|
392 | matches = []
|
393 | for name, regex of _regexen
|
394 | regex.lastIndex = 0
|
395 | while rx_match = regex.exec password
|
396 | token = rx_match[0]
|
397 | matches.push
|
398 | pattern: 'regex'
|
399 | token: token
|
400 | i: rx_match.index
|
401 | j: rx_match.index + rx_match[0].length - 1
|
402 | regex_name: name
|
403 | regex_match: rx_match
|
404 | @sorted matches
|
405 |
|
406 |
|
407 |
|
408 |
|
409 |
|
410 | date_match: (password) ->
|
411 |
|
412 |
|
413 |
|
414 |
|
415 |
|
416 |
|
417 |
|
418 |
|
419 |
|
420 |
|
421 |
|
422 |
|
423 |
|
424 |
|
425 |
|
426 |
|
427 |
|
428 |
|
429 | matches = []
|
430 | maybe_date_no_separator = /^\d{4,8}$/
|
431 | maybe_date_with_separator = ///
|
432 | ^
|
433 | ( \d{1,4} )
|
434 | ( [\s/\\_.-] )
|
435 | ( \d{1,2} )
|
436 | \2
|
437 | ( \d{1,4} )
|
438 | $
|
439 | ///
|
440 |
|
441 |
|
442 | for i in [0..password.length - 4]
|
443 | for j in [i + 3..i + 7]
|
444 | break if j >= password.length
|
445 | token = password[i..j]
|
446 | continue unless maybe_date_no_separator.exec token
|
447 | candidates = []
|
448 | for [k,l] in DATE_SPLITS[token.length]
|
449 | dmy = @map_ints_to_dmy [
|
450 | parseInt token[0...k]
|
451 | parseInt token[k...l]
|
452 | parseInt token[l...]
|
453 | ]
|
454 | candidates.push dmy if dmy?
|
455 | continue unless candidates.length > 0
|
456 |
|
457 |
|
458 |
|
459 |
|
460 |
|
461 |
|
462 | best_candidate = candidates[0]
|
463 | metric = (candidate) -> Math.abs candidate.year - scoring.REFERENCE_YEAR
|
464 | min_distance = metric candidates[0]
|
465 | for candidate in candidates[1..]
|
466 | distance = metric candidate
|
467 | if distance < min_distance
|
468 | [best_candidate, min_distance] = [candidate, distance]
|
469 | matches.push
|
470 | pattern: 'date'
|
471 | token: token
|
472 | i: i
|
473 | j: j
|
474 | separator: ''
|
475 | year: best_candidate.year
|
476 | month: best_candidate.month
|
477 | day: best_candidate.day
|
478 |
|
479 |
|
480 | for i in [0..password.length - 6]
|
481 | for j in [i + 5..i + 9]
|
482 | break if j >= password.length
|
483 | token = password[i..j]
|
484 | rx_match = maybe_date_with_separator.exec token
|
485 | continue unless rx_match?
|
486 | dmy = @map_ints_to_dmy [
|
487 | parseInt rx_match[1]
|
488 | parseInt rx_match[3]
|
489 | parseInt rx_match[4]
|
490 | ]
|
491 | continue unless dmy?
|
492 | matches.push
|
493 | pattern: 'date'
|
494 | token: token
|
495 | i: i
|
496 | j: j
|
497 | separator: rx_match[2]
|
498 | year: dmy.year
|
499 | month: dmy.month
|
500 | day: dmy.day
|
501 |
|
502 |
|
503 |
|
504 |
|
505 |
|
506 |
|
507 |
|
508 |
|
509 | @sorted matches.filter (match) ->
|
510 | is_submatch = false
|
511 | for other_match in matches
|
512 | continue if match is other_match
|
513 | if other_match.i <= match.i and other_match.j >= match.j
|
514 | is_submatch = true
|
515 | break
|
516 | not is_submatch
|
517 |
|
518 | map_ints_to_dmy: (ints) ->
|
519 |
|
520 |
|
521 |
|
522 |
|
523 |
|
524 |
|
525 |
|
526 |
|
527 | return if ints[1] > 31 or ints[1] <= 0
|
528 | over_12 = 0
|
529 | over_31 = 0
|
530 | under_1 = 0
|
531 | for int in ints
|
532 | return if 99 < int < DATE_MIN_YEAR or int > DATE_MAX_YEAR
|
533 | over_31 += 1 if int > 31
|
534 | over_12 += 1 if int > 12
|
535 | under_1 += 1 if int <= 0
|
536 | return if over_31 >= 2 or over_12 == 3 or under_1 >= 2
|
537 |
|
538 |
|
539 | possible_year_splits = [
|
540 | [ints[2], ints[0..1]]
|
541 | [ints[0], ints[1..2]]
|
542 | ]
|
543 | for [y, rest] in possible_year_splits
|
544 | if DATE_MIN_YEAR <= y <= DATE_MAX_YEAR
|
545 | dm = @map_ints_to_dm rest
|
546 | if dm?
|
547 | return {
|
548 | year: y
|
549 | month: dm.month
|
550 | day: dm.day
|
551 | }
|
552 | else
|
553 |
|
554 |
|
555 |
|
556 | return
|
557 |
|
558 |
|
559 |
|
560 | for [y, rest] in possible_year_splits
|
561 | dm = @map_ints_to_dm rest
|
562 | if dm?
|
563 | y = @two_to_four_digit_year y
|
564 | return {
|
565 | year: y
|
566 | month: dm.month
|
567 | day: dm.day
|
568 | }
|
569 |
|
570 | map_ints_to_dm: (ints) ->
|
571 | for [d, m] in [ints, ints.slice().reverse()]
|
572 | if 1 <= d <= 31 and 1 <= m <= 12
|
573 | return {
|
574 | day: d
|
575 | month: m
|
576 | }
|
577 |
|
578 | two_to_four_digit_year: (year) ->
|
579 | if year > 99
|
580 | year
|
581 | else if year > 50
|
582 |
|
583 | year + scoring.REFERENCE_YEAR - 100
|
584 | else
|
585 |
|
586 | year + scoring.REFERENCE_YEAR
|
587 |
|
588 | module.exports = matching
|