UNPKG

zxcvbn/src/matching.coffee

Version:

20.4 kBtext/coffeescriptView Raw

1frequency_lists = require('./frequency_lists')
2adjacency_graphs = require('./adjacency_graphs')
3scoring = require('./scoring')
4
5build_ranked_dict = (ordered_list) ->
result = {}
i = 1 # rank starts at 1, not 0
for word in ordered_list
  result[word] = i
  i += 1
result
12
13RANKED_DICTIONARIES =
passwords:    build_ranked_dict frequency_lists.passwords
english:      build_ranked_dict frequency_lists.english
surnames:     build_ranked_dict frequency_lists.surnames
male_names:   build_ranked_dict frequency_lists.male_names
female_names: build_ranked_dict frequency_lists.female_names
19
20GRAPHS =
qwerty:     adjacency_graphs.qwerty
dvorak:     adjacency_graphs.dvorak
keypad:     adjacency_graphs.keypad
mac_keypad: adjacency_graphs.mac_keypad
25
26SEQUENCES =
lower: 'abcdefghijklmnopqrstuvwxyz'
upper: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
digits: '0123456789'
30
31L33T_TABLE =
a: ['4', '@']
b: ['8']
c: ['(', '{', '[', '<']
e: ['3']
g: ['6', '9']
i: ['1', '!', '|']
l: ['1', '|', '7']
o: ['0']
s: ['$', '5']
t: ['+', '7']
x: ['%']
z: ['2']
44
45REGEXEN =
recent_year:  /19\d\d|200\d|201\d/g
47
48DATE_MAX_YEAR = 2050
49DATE_MIN_YEAR = 1000
50DATE_SPLITS =
4:[      # for length-4 strings, eg 1191 or 9111, two ways to split:
  [1, 2] # 1 1 91 (2nd split starts at index 1, 3rd at index 2)
  [2, 3] # 91 1 1
  ]
5:[
  [1, 3] # 1 11 91
  [2, 3] # 11 1 91
  ]
6:[
  [1, 2] # 1 1 1991
  [2, 4] # 11 11 91
  [4, 5] # 1991 1 1
  ]
7:[
  [1, 3] # 1 11 1991
  [2, 3] # 11 1 1991
  [4, 5] # 1991 1 11
  [4, 6] # 1991 11 1
  ]
8:[
  [2, 4] # 11 11 1991
  [4, 6] # 1991 11 11
  ]
74
75matching =
empty: (obj) -> (k for k of obj).length == 0
extend: (lst, lst2) -> lst.push.apply lst, lst2
translate: (string, chr_map) -> (chr_map[chr] or chr for chr in string.split('')).join('')
mod: (n, m) -> ((n % m) + m) % m # mod impl that works for negative numbers
sorted: (matches) ->
  # sort on i primary, j secondary
  matches.sort (m1, m2) ->
    (m1.i - m2.i) or (m1.j - m2.j)
84
# ------------------------------------------------------------------------------
# omnimatch -- combine everything ----------------------------------------------
# ------------------------------------------------------------------------------
88
omnimatch: (password) ->
  matches = []
  matchers = [
    @dictionary_match
    @reverse_dictionary_match
    @l33t_match
    @spatial_match
    @repeat_match
    @sequence_match
    @regex_match
    @date_match
  ]
  for matcher in matchers
    @extend matches, matcher.call(this, password)
  @sorted matches
104
#-------------------------------------------------------------------------------
# dictionary match (common passwords, english, last names, etc) ----------------
#-------------------------------------------------------------------------------
108
dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->
  # _ranked_dictionaries variable is for unit testing purposes
  matches = []
  len = password.length
  password_lower = password.toLowerCase()
  for dictionary_name, ranked_dict of _ranked_dictionaries
    for i in [0...len]
      for j in [i...len]
        if password_lower[i..j] of ranked_dict
          word = password_lower[i..j]
          rank = ranked_dict[word]
          matches.push
            pattern: 'dictionary'
            i: i
            j: j
            token: password[i..j]
            matched_word: word
            rank: rank
            dictionary_name: dictionary_name
            reversed: false
            l33t: false
  @sorted matches
131
reverse_dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->
  reversed_password = password.split('').reverse().join('')
  matches = @dictionary_match reversed_password, _ranked_dictionaries
  for match in matches
    match.token = match.token.split('').reverse().join('') # reverse back
    match.reversed = true
    # map coordinates back to original string
    [match.i, match.j] = [
      password.length - 1 - match.j
      password.length - 1 - match.i
    ]
  @sorted matches
144
set_user_input_dictionary: (ordered_list) ->
  RANKED_DICTIONARIES['user_inputs'] = build_ranked_dict ordered_list.slice()
147
#-------------------------------------------------------------------------------
# dictionary match with common l33t substitutions ------------------------------
#-------------------------------------------------------------------------------
151
# makes a pruned copy of l33t_table that only includes password's possible substitutions
relevant_l33t_subtable: (password, table) ->
  password_chars = {}
  for chr in password.split('')
    password_chars[chr] = true
  subtable = {}
  for letter, subs of table
    relevant_subs = (sub for sub in subs when sub of password_chars)
    if relevant_subs.length > 0
      subtable[letter] = relevant_subs
  subtable
163
# returns the list of possible 1337 replacement dictionaries for a given password
enumerate_l33t_subs: (table) ->
  keys = (k for k of table)
  subs = [[]]
168
  dedup = (subs) ->
    deduped = []
    members = {}
    for sub in subs
      assoc = ([k,v] for k,v in sub)
      assoc.sort()
      label = (k+','+v for k,v in assoc).join('-')
      unless label of members
        members[label] = true
        deduped.push sub
    deduped
180
  helper = (keys) ->
    return if not keys.length
    first_key = keys[0]
    rest_keys = keys[1..]
    next_subs = []
    for l33t_chr in table[first_key]
      for sub in subs
        dup_l33t_index = -1
        for i in [0...sub.length]
          if sub[i][0] == l33t_chr
            dup_l33t_index = i
            break
        if dup_l33t_index == -1
          sub_extension = sub.concat [[l33t_chr, first_key]]
          next_subs.push sub_extension
        else
          sub_alternative = sub.slice(0)
          sub_alternative.splice(dup_l33t_index, 1)
          sub_alternative.push [l33t_chr, first_key]
          next_subs.push sub
          next_subs.push sub_alternative
    subs = dedup next_subs
    helper(rest_keys)
204
  helper(keys)
  sub_dicts = [] # convert from assoc lists to dicts
  for sub in subs
    sub_dict = {}
    for [l33t_chr, chr] in sub
      sub_dict[l33t_chr] = chr
    sub_dicts.push sub_dict
  sub_dicts
213
l33t_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE) ->
  matches = []
  for sub in @enumerate_l33t_subs @relevant_l33t_subtable(password, _l33t_table)
    break if @empty sub # corner case: password has no relevant subs.
    subbed_password = @translate password, sub
    for match in @dictionary_match(subbed_password, _ranked_dictionaries)
      token = password[match.i..match.j]
      if token.toLowerCase() == match.matched_word
        continue # only return the matches that contain an actual substitution
      match_sub = {} # subset of mappings in sub that are in use for this match
      for subbed_chr, chr of sub when token.indexOf(subbed_chr) != -1
        match_sub[subbed_chr] = chr
      match.l33t = true
      match.token = token
      match.sub = match_sub
      match.sub_display = ("#{k} -> #{v}" for k,v of match_sub).join(', ')
      matches.push match
  @sorted matches.filter (match) ->
    # filter single-character l33t matches to reduce noise.
    # otherwise '1' matches 'i', '4' matches 'a', both very common English words
    # with low dictionary rank.
    match.token.length > 1
236
# ------------------------------------------------------------------------------
# spatial match (qwerty/dvorak/keypad) -----------------------------------------
# ------------------------------------------------------------------------------
240
spatial_match: (password, _graphs = GRAPHS) ->
  matches = []
  for graph_name, graph of _graphs
    @extend matches, @spatial_match_helper(password, graph, graph_name)
  @sorted matches
246
SHIFTED_RX: /[~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?]/
spatial_match_helper: (password, graph, graph_name) ->
  matches = []
  i = 0
  while i < password.length - 1
    j = i + 1
    last_direction = null
    turns = 0
    if graph_name in ['qwerty', 'dvorak'] and @SHIFTED_RX.exec(password.charAt(i))
      # initial character is shifted
      shifted_count = 1
    else
      shifted_count = 0
    loop
      prev_char = password.charAt(j-1)
      found = false
      found_direction = -1
      cur_direction = -1
      adjacents = graph[prev_char] or []
      # consider growing pattern by one character if j hasn't gone over the edge.
      if j < password.length
        cur_char = password.charAt(j)
        for adj in adjacents
          cur_direction += 1
          if adj and adj.indexOf(cur_char) != -1
            found = true
            found_direction = cur_direction
            if adj.indexOf(cur_char) == 1
              # index 1 in the adjacency means the key is shifted,
              # 0 means unshifted: A vs a, % vs 5, etc.
              # for example, 'q' is adjacent to the entry '2@'.
              # @ is shifted w/ index 1, 2 is unshifted.
              shifted_count += 1
            if last_direction != found_direction
              # adding a turn is correct even in the initial case when last_direction is null:
              # every spatial pattern starts with a turn.
              turns += 1
              last_direction = found_direction
            break
      # if the current pattern continued, extend j and try to grow again
      if found
        j += 1
      # otherwise push the pattern discovered so far, if any...
      else
        if j - i > 2 # don't consider length 1 or 2 chains.
          matches.push
            pattern: 'spatial'
            i: i
            j: j-1
            token: password[i...j]
            graph: graph_name
            turns: turns
            shifted_count: shifted_count
        # ...and then start a new search for the rest of the password.
        i = j
        break
  matches
304
#-------------------------------------------------------------------------------
# repeats (aaa, abcabcabc) and sequences (abcdef) ------------------------------
#-------------------------------------------------------------------------------
308
repeat_match: (password) ->
  matches = []
  greedy = /(.+)\1+/g
  lazy = /(.+?)\1+/g
  lazy_anchored = /^(.+?)\1+$/
  lastIndex = 0
  while lastIndex < password.length
    greedy.lastIndex = lazy.lastIndex = lastIndex
    greedy_match = greedy.exec password
    lazy_match = lazy.exec password
    break unless greedy_match?
    if greedy_match[0].length > lazy_match[0].length
      # greedy beats lazy for 'aabaab'
      #   greedy: [aabaab, aab]
      #   lazy:   [aa,     a]
      match = greedy_match
      # greedy's repeated string might itself be repeated, eg.
      # aabaab in aabaabaabaab.
      # run an anchored lazy match on greedy's repeated string
      # to find the shortest repeated string
      base_token = lazy_anchored.exec(match[0])[1]
    else
      # lazy beats greedy for 'aaaaa'
      #   greedy: [aaaa,  aa]
      #   lazy:   [aaaaa, a]
      match = lazy_match
      base_token = match[1]
    [i, j] = [match.index, match.index + match[0].length - 1]
    # recursively match and score the base string
    base_analysis = scoring.most_guessable_match_sequence(
      base_token
      @omnimatch base_token
    )
    base_matches = base_analysis.match_sequence
    base_guesses = base_analysis.guesses
    matches.push
      pattern: 'repeat'
      i: i
      j: j
      token: match[0]
      base_token: base_token
      base_guesses: base_guesses
      base_matches: base_matches
      repeat_count: match[0].length / base_token.length
    lastIndex = j + 1
  matches
355
sequence_match: (password) ->
  matches = []
  for sequence_name, sequence of SEQUENCES
    for direction in [1, -1]
      i = 0
      while i < password.length
        unless password.charAt(i) in sequence
          i += 1
          continue
        j = i + 1
        sequence_position = sequence.indexOf password.charAt(i)
        while j < password.length
          # mod by sequence length to allow sequences to wrap around: xyzabc
          next_sequence_position = @mod sequence_position + direction, sequence.length
          unless sequence.indexOf(password.charAt(j)) == next_sequence_position
            break
          j += 1
          sequence_position = next_sequence_position
        j -= 1
        if j - i + 1 > 1
          matches.push
            pattern: 'sequence'
            i: i
            j: j
            token: password[i..j]
            sequence_name: sequence_name
            sequence_space: sequence.length
            ascending: direction == 1
        i = j + 1
  @sorted matches
386
#-------------------------------------------------------------------------------
# regex matching ---------------------------------------------------------------
#-------------------------------------------------------------------------------
390
regex_match: (password, _regexen = REGEXEN) ->
  matches = []
  for name, regex of _regexen
    regex.lastIndex = 0 # keeps regex_match stateless
    while rx_match = regex.exec password
      token = rx_match[0]
      matches.push
        pattern: 'regex'
        token: token
        i: rx_match.index
        j: rx_match.index + rx_match[0].length - 1
        regex_name: name
        regex_match: rx_match
  @sorted matches
405
#-------------------------------------------------------------------------------
# date matching ----------------------------------------------------------------
#-------------------------------------------------------------------------------
409
date_match: (password) ->
  # a "date" is recognized as:
  #   any 3-tuple that starts or ends with a 2- or 4-digit year,
  #   with 2 or 0 separator chars (1.1.91 or 1191),
  #   maybe zero-padded (01-01-91 vs 1-1-91),
  #   a month between 1 and 12,
  #   a day between 1 and 31.
  #
  # note: this isn't true date parsing in that "feb 31st" is allowed,
  # this doesn't check for leap years, etc.
  #
  # recipe:
  # start with regex to find maybe-dates, then attempt to map the integers
  # onto month-day-year to filter the maybe-dates into dates.
  # finally, remove matches that are substrings of other matches to reduce noise.
  #
  # note: instead of using a lazy or greedy regex to find many dates over the full string,
  # this uses a ^...$ regex against every substring of the password -- less performant but leads
  # to every possible date match.
  matches = []
  maybe_date_no_separator = /^\d{4,8}$/
  maybe_date_with_separator = ///
    ^
    ( \d{1,4} )    # day, month, year
    ( [\s/\\_.-] ) # separator
    ( \d{1,2} )    # day, month
    \2             # same separator
    ( \d{1,4} )    # day, month, year
    $
  ///
440
  # dates without separators are between length 4 '1191' and 8 '11111991'
  for i in [0..password.length - 4]
    for j in [i + 3..i + 7]
      break if j >= password.length
      token = password[i..j]
      continue unless maybe_date_no_separator.exec token
      candidates = []
      for [k,l] in DATE_SPLITS[token.length]
        dmy = @map_ints_to_dmy [
          parseInt token[0...k]
          parseInt token[k...l]
          parseInt token[l...]
        ]
        candidates.push dmy if dmy?
      continue unless candidates.length > 0
      # at this point: different possible dmy mappings for the same i,j substring.
      # match the candidate date that likely takes the fewest guesses: a year closest to 2000.
      # (scoring.REFERENCE_YEAR).
      #
      # ie, considering '111504', prefer 11-15-04 to 1-1-1504
      # (interpreting '04' as 2004)
      best_candidate = candidates[0]
      metric = (candidate) -> Math.abs candidate.year - scoring.REFERENCE_YEAR
      min_distance = metric candidates[0]
      for candidate in candidates[1..]
        distance = metric candidate
        if distance < min_distance
          [best_candidate, min_distance] = [candidate, distance]
      matches.push
        pattern: 'date'
        token: token
        i: i
        j: j
        separator: ''
        year: best_candidate.year
        month: best_candidate.month
        day: best_candidate.day
478
  # dates with separators are between length 6 '1/1/91' and 10 '11/11/1991'
  for i in [0..password.length - 6]
    for j in [i + 5..i + 9]
      break if j >= password.length
      token = password[i..j]
      rx_match = maybe_date_with_separator.exec token
      continue unless rx_match?
      dmy = @map_ints_to_dmy [
        parseInt rx_match[1]
        parseInt rx_match[3]
        parseInt rx_match[4]
      ]
      continue unless dmy?
      matches.push
        pattern: 'date'
        token: token
        i: i
        j: j
        separator: rx_match[2]
        year: dmy.year
        month: dmy.month
        day: dmy.day
501
  # matches now contains all valid date strings in a way that is tricky to capture
  # with regexes only. while thorough, it will contain some unintuitive noise:
  #
  # '2015_06_04', in addition to matching 2015_06_04, will also contain
  # 5(!) other date matches: 15_06_04, 5_06_04, ..., even 2015 (matched as 5/1/2020)
  #
  # to reduce noise, remove date matches that are strict substrings of others
  @sorted matches.filter (match) ->
    is_submatch = false
    for other_match in matches
      continue if match is other_match
      if other_match.i <= match.i and other_match.j >= match.j
        is_submatch = true
        break
    not is_submatch
517
map_ints_to_dmy: (ints) ->
  # given a 3-tuple, discard if:
  #   middle int is over 31 (for all dmy formats, years are never allowed in the middle)
  #   middle int is zero
  #   any int is over the max allowable year
  #   any int is over two digits but under the min allowable year
  #   2 ints are over 31, the max allowable day
  #   2 ints are zero
  #   all ints are over 12, the max allowable month
  return if ints[1] > 31 or ints[1] <= 0
  over_12 = 0
  over_31 = 0
  under_1 = 0
  for int in ints
    return if 99 < int < DATE_MIN_YEAR or int > DATE_MAX_YEAR
    over_31 += 1 if int > 31
    over_12 += 1 if int > 12
    under_1 += 1 if int <= 0
  return if over_31 >= 2 or over_12 == 3 or under_1 >= 2
537
  # first look for a four digit year: yyyy + daymonth or daymonth + yyyy
  possible_year_splits = [
    [ints[2], ints[0..1]] # year last
    [ints[0], ints[1..2]] # year first
  ]
  for [y, rest] in possible_year_splits
    if DATE_MIN_YEAR <= y <= DATE_MAX_YEAR
      dm = @map_ints_to_dm rest
      if dm?
        return {
          year: y
          month: dm.month
          day: dm.day
        }
      else
        # for a candidate that includes a four-digit year,
        # when the remaining ints don't match to a day and month,
        # it is not a date.
        return
557
  # given no four-digit year, two digit years are the most flexible int to match, so
  # try to parse a day-month out of ints[0..1] or ints[1..0]
  for [y, rest] in possible_year_splits
    dm = @map_ints_to_dm rest
    if dm?
      y = @two_to_four_digit_year y
      return {
        year: y
        month: dm.month
        day: dm.day
      }
569
map_ints_to_dm: (ints) ->
  for [d, m] in [ints, ints.slice().reverse()]
    if 1 <= d <= 31 and 1 <= m <= 12
      return {
        day: d
        month: m
      }
577
two_to_four_digit_year: (year) ->
  if year > 99
    year
  else if year > 50
    # 87 -> 1987
    year + scoring.REFERENCE_YEAR - 100
  else
    # 15 -> 2015
    year + scoring.REFERENCE_YEAR
587
588module.exports = matching

1	`frequency_lists = require('./frequency_lists')`
2	`adjacency_graphs = require('./adjacency_graphs')`
3	`scoring = require('./scoring')`
4
5	`build_ranked_dict = (ordered_list) ->`
6	`result = {}`
7	`i = 1 # rank starts at 1, not 0`
8	`for word in ordered_list`
9	`result[word] = i`
10	`i += 1`
11	`result`
12
13	`RANKED_DICTIONARIES =`
14	`passwords: build_ranked_dict frequency_lists.passwords`
15	`english: build_ranked_dict frequency_lists.english`
16	`surnames: build_ranked_dict frequency_lists.surnames`
17	`male_names: build_ranked_dict frequency_lists.male_names`
18	`female_names: build_ranked_dict frequency_lists.female_names`
19
20	`GRAPHS =`
21	`qwerty: adjacency_graphs.qwerty`
22	`dvorak: adjacency_graphs.dvorak`
23	`keypad: adjacency_graphs.keypad`
24	`mac_keypad: adjacency_graphs.mac_keypad`
25
26	`SEQUENCES =`
27	`lower: 'abcdefghijklmnopqrstuvwxyz'`
28	`upper: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
29	`digits: '0123456789'`
30
31	`L33T_TABLE =`
32	`a: ['4', '@']`
33	`b: ['8']`
34	`c: ['(', '{', '[', '<']`
35	`e: ['3']`
36	`g: ['6', '9']`
37	`i: ['1', '!', '\|']`
38	`l: ['1', '\|', '7']`
39	`o: ['0']`
40	`s: ['$', '5']`
41	`t: ['+', '7']`
42	`x: ['%']`
43	`z: ['2']`
44
45	`REGEXEN =`
46	`recent_year: /19\d\d\|200\d\|201\d/g`
47
48	`DATE_MAX_YEAR = 2050`
49	`DATE_MIN_YEAR = 1000`
50	`DATE_SPLITS =`
51	`4:[ # for length-4 strings, eg 1191 or 9111, two ways to split:`
52	`[1, 2] # 1 1 91 (2nd split starts at index 1, 3rd at index 2)`
53	`[2, 3] # 91 1 1`
54	`]`
55	`5:[`
56	`[1, 3] # 1 11 91`
57	`[2, 3] # 11 1 91`
58	`]`
59	`6:[`
60	`[1, 2] # 1 1 1991`
61	`[2, 4] # 11 11 91`
62	`[4, 5] # 1991 1 1`
63	`]`
64	`7:[`
65	`[1, 3] # 1 11 1991`
66	`[2, 3] # 11 1 1991`
67	`[4, 5] # 1991 1 11`
68	`[4, 6] # 1991 11 1`
69	`]`
70	`8:[`
71	`[2, 4] # 11 11 1991`
72	`[4, 6] # 1991 11 11`
73	`]`
74
75	`matching =`
76	`empty: (obj) -> (k for k of obj).length == 0`
77	`extend: (lst, lst2) -> lst.push.apply lst, lst2`
78	`translate: (string, chr_map) -> (chr_map[chr] or chr for chr in string.split('')).join('')`
79	`mod: (n, m) -> ((n % m) + m) % m # mod impl that works for negative numbers`
80	`sorted: (matches) ->`
81	`# sort on i primary, j secondary`
82	`matches.sort (m1, m2) ->`
83	`(m1.i - m2.i) or (m1.j - m2.j)`
84
85	`# ------------------------------------------------------------------------------`
86	`# omnimatch -- combine everything ----------------------------------------------`
87	`# ------------------------------------------------------------------------------`
88
89	`omnimatch: (password) ->`
90	`matches = []`
91	`matchers = [`
92	`@dictionary_match`
93	`@reverse_dictionary_match`
94	`@l33t_match`
95	`@spatial_match`
96	`@repeat_match`
97	`@sequence_match`
98	`@regex_match`
99	`@date_match`
100	`]`
101	`for matcher in matchers`
102	`@extend matches, matcher.call(this, password)`
103	`@sorted matches`
104
105	`#-------------------------------------------------------------------------------`
106	`# dictionary match (common passwords, english, last names, etc) ----------------`
107	`#-------------------------------------------------------------------------------`
108
109	`dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->`
110	`# _ranked_dictionaries variable is for unit testing purposes`
111	`matches = []`
112	`len = password.length`
113	`password_lower = password.toLowerCase()`
114	`for dictionary_name, ranked_dict of _ranked_dictionaries`
115	`for i in [0...len]`
116	`for j in [i...len]`
117	`if password_lower[i..j] of ranked_dict`
118	`word = password_lower[i..j]`
119	`rank = ranked_dict[word]`
120	`matches.push`
121	`pattern: 'dictionary'`
122	`i: i`
123	`j: j`
124	`token: password[i..j]`
125	`matched_word: word`
126	`rank: rank`
127	`dictionary_name: dictionary_name`
128	`reversed: false`
129	`l33t: false`
130	`@sorted matches`
131
132	`reverse_dictionary_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES) ->`
133	`reversed_password = password.split('').reverse().join('')`
134	`matches = @dictionary_match reversed_password, _ranked_dictionaries`
135	`for match in matches`
136	`match.token = match.token.split('').reverse().join('') # reverse back`
137	`match.reversed = true`
138	`# map coordinates back to original string`
139	`[match.i, match.j] = [`
140	`password.length - 1 - match.j`
141	`password.length - 1 - match.i`
142	`]`
143	`@sorted matches`
144
145	`set_user_input_dictionary: (ordered_list) ->`
146	`RANKED_DICTIONARIES['user_inputs'] = build_ranked_dict ordered_list.slice()`
147
148	`#-------------------------------------------------------------------------------`
149	`# dictionary match with common l33t substitutions ------------------------------`
150	`#-------------------------------------------------------------------------------`
151
152	`# makes a pruned copy of l33t_table that only includes password's possible substitutions`
153	`relevant_l33t_subtable: (password, table) ->`
154	`password_chars = {}`
155	`for chr in password.split('')`
156	`password_chars[chr] = true`
157	`subtable = {}`
158	`for letter, subs of table`
159	`relevant_subs = (sub for sub in subs when sub of password_chars)`
160	`if relevant_subs.length > 0`
161	`subtable[letter] = relevant_subs`
162	`subtable`
163
164	`# returns the list of possible 1337 replacement dictionaries for a given password`
165	`enumerate_l33t_subs: (table) ->`
166	`keys = (k for k of table)`
167	`subs = [[]]`
168
169	`dedup = (subs) ->`
170	`deduped = []`
171	`members = {}`
172	`for sub in subs`
173	`assoc = ([k,v] for k,v in sub)`
174	`assoc.sort()`
175	`label = (k+','+v for k,v in assoc).join('-')`
176	`unless label of members`
177	`members[label] = true`
178	`deduped.push sub`
179	`deduped`
180
181	`helper = (keys) ->`
182	`return if not keys.length`
183	`first_key = keys[0]`
184	`rest_keys = keys[1..]`
185	`next_subs = []`
186	`for l33t_chr in table[first_key]`
187	`for sub in subs`
188	`dup_l33t_index = -1`
189	`for i in [0...sub.length]`
190	`if sub[i][0] == l33t_chr`
191	`dup_l33t_index = i`
192	`break`
193	`if dup_l33t_index == -1`
194	`sub_extension = sub.concat [[l33t_chr, first_key]]`
195	`next_subs.push sub_extension`
196	`else`
197	`sub_alternative = sub.slice(0)`
198	`sub_alternative.splice(dup_l33t_index, 1)`
199	`sub_alternative.push [l33t_chr, first_key]`
200	`next_subs.push sub`
201	`next_subs.push sub_alternative`
202	`subs = dedup next_subs`
203	`helper(rest_keys)`
204
205	`helper(keys)`
206	`sub_dicts = [] # convert from assoc lists to dicts`
207	`for sub in subs`
208	`sub_dict = {}`
209	`for [l33t_chr, chr] in sub`
210	`sub_dict[l33t_chr] = chr`
211	`sub_dicts.push sub_dict`
212	`sub_dicts`
213
214	`l33t_match: (password, _ranked_dictionaries = RANKED_DICTIONARIES, _l33t_table = L33T_TABLE) ->`
215	`matches = []`
216	`for sub in @enumerate_l33t_subs @relevant_l33t_subtable(password, _l33t_table)`
217	`break if @empty sub # corner case: password has no relevant subs.`
218	`subbed_password = @translate password, sub`
219	`for match in @dictionary_match(subbed_password, _ranked_dictionaries)`
220	`token = password[match.i..match.j]`
221	`if token.toLowerCase() == match.matched_word`
222	`continue # only return the matches that contain an actual substitution`
223	`match_sub = {} # subset of mappings in sub that are in use for this match`
224	`for subbed_chr, chr of sub when token.indexOf(subbed_chr) != -1`
225	`match_sub[subbed_chr] = chr`
226	`match.l33t = true`
227	`match.token = token`
228	`match.sub = match_sub`
229	`match.sub_display = ("#{k} -> #{v}" for k,v of match_sub).join(', ')`
230	`matches.push match`
231	`@sorted matches.filter (match) ->`
232	`# filter single-character l33t matches to reduce noise.`
233	`# otherwise '1' matches 'i', '4' matches 'a', both very common English words`
234	`# with low dictionary rank.`
235	`match.token.length > 1`
236
237	`# ------------------------------------------------------------------------------`
238	`# spatial match (qwerty/dvorak/keypad) -----------------------------------------`
239	`# ------------------------------------------------------------------------------`
240
241	`spatial_match: (password, _graphs = GRAPHS) ->`
242	`matches = []`
243	`for graph_name, graph of _graphs`
244	`@extend matches, @spatial_match_helper(password, graph, graph_name)`
245	`@sorted matches`
246
247	`SHIFTED_RX: /[~!@#$%^&*()_+QWERTYUIOP{}\|ASDFGHJKL:"ZXCVBNM<>?]/`
248	`spatial_match_helper: (password, graph, graph_name) ->`
249	`matches = []`
250	`i = 0`
251	`while i < password.length - 1`
252	`j = i + 1`
253	`last_direction = null`
254	`turns = 0`
255	`if graph_name in ['qwerty', 'dvorak'] and @SHIFTED_RX.exec(password.charAt(i))`
256	`# initial character is shifted`
257	`shifted_count = 1`
258	`else`
259	`shifted_count = 0`
260	`loop`
261	`prev_char = password.charAt(j-1)`
262	`found = false`
263	`found_direction = -1`
264	`cur_direction = -1`
265	`adjacents = graph[prev_char] or []`
266	`# consider growing pattern by one character if j hasn't gone over the edge.`
267	`if j < password.length`
268	`cur_char = password.charAt(j)`
269	`for adj in adjacents`
270	`cur_direction += 1`
271	`if adj and adj.indexOf(cur_char) != -1`
272	`found = true`
273	`found_direction = cur_direction`
274	`if adj.indexOf(cur_char) == 1`
275	`# index 1 in the adjacency means the key is shifted,`
276	`# 0 means unshifted: A vs a, % vs 5, etc.`
277	`# for example, 'q' is adjacent to the entry '2@'.`
278	`# @ is shifted w/ index 1, 2 is unshifted.`
279	`shifted_count += 1`
280	`if last_direction != found_direction`
281	`# adding a turn is correct even in the initial case when last_direction is null:`
282	`# every spatial pattern starts with a turn.`
283	`turns += 1`
284	`last_direction = found_direction`
285	`break`
286	`# if the current pattern continued, extend j and try to grow again`
287	`if found`
288	`j += 1`
289	`# otherwise push the pattern discovered so far, if any...`
290	`else`
291	`if j - i > 2 # don't consider length 1 or 2 chains.`
292	`matches.push`
293	`pattern: 'spatial'`
294	`i: i`
295	`j: j-1`
296	`token: password[i...j]`
297	`graph: graph_name`
298	`turns: turns`
299	`shifted_count: shifted_count`
300	`# ...and then start a new search for the rest of the password.`
301	`i = j`
302	`break`
303	`matches`
304
305	`#-------------------------------------------------------------------------------`
306	`# repeats (aaa, abcabcabc) and sequences (abcdef) ------------------------------`
307	`#-------------------------------------------------------------------------------`
308
309	`repeat_match: (password) ->`
310	`matches = []`
311	`greedy = /(.+)\1+/g`
312	`lazy = /(.+?)\1+/g`
313	`lazy_anchored = /^(.+?)\1+$/`
314	`lastIndex = 0`
315	`while lastIndex < password.length`
316	`greedy.lastIndex = lazy.lastIndex = lastIndex`
317	`greedy_match = greedy.exec password`
318	`lazy_match = lazy.exec password`
319	`break unless greedy_match?`
320	`if greedy_match[0].length > lazy_match[0].length`
321	`# greedy beats lazy for 'aabaab'`
322	`# greedy: [aabaab, aab]`
323	`# lazy: [aa, a]`
324	`match = greedy_match`
325	`# greedy's repeated string might itself be repeated, eg.`
326	`# aabaab in aabaabaabaab.`
327	`# run an anchored lazy match on greedy's repeated string`
328	`# to find the shortest repeated string`
329	`base_token = lazy_anchored.exec(match[0])[1]`
330	`else`
331	`# lazy beats greedy for 'aaaaa'`
332	`# greedy: [aaaa, aa]`
333	`# lazy: [aaaaa, a]`
334	`match = lazy_match`
335	`base_token = match[1]`
336	`[i, j] = [match.index, match.index + match[0].length - 1]`
337	`# recursively match and score the base string`
338	`base_analysis = scoring.most_guessable_match_sequence(`
339	`base_token`
340	`@omnimatch base_token`
341	`)`
342	`base_matches = base_analysis.match_sequence`
343	`base_guesses = base_analysis.guesses`
344	`matches.push`
345	`pattern: 'repeat'`
346	`i: i`
347	`j: j`
348	`token: match[0]`
349	`base_token: base_token`
350	`base_guesses: base_guesses`
351	`base_matches: base_matches`
352	`repeat_count: match[0].length / base_token.length`
353	`lastIndex = j + 1`
354	`matches`
355
356	`sequence_match: (password) ->`
357	`matches = []`
358	`for sequence_name, sequence of SEQUENCES`
359	`for direction in [1, -1]`
360	`i = 0`
361	`while i < password.length`
362	`unless password.charAt(i) in sequence`
363	`i += 1`
364	`continue`
365	`j = i + 1`
366	`sequence_position = sequence.indexOf password.charAt(i)`
367	`while j < password.length`
368	`# mod by sequence length to allow sequences to wrap around: xyzabc`
369	`next_sequence_position = @mod sequence_position + direction, sequence.length`
370	`unless sequence.indexOf(password.charAt(j)) == next_sequence_position`
371	`break`
372	`j += 1`
373	`sequence_position = next_sequence_position`
374	`j -= 1`
375	`if j - i + 1 > 1`
376	`matches.push`
377	`pattern: 'sequence'`
378	`i: i`
379	`j: j`
380	`token: password[i..j]`
381	`sequence_name: sequence_name`
382	`sequence_space: sequence.length`
383	`ascending: direction == 1`
384	`i = j + 1`
385	`@sorted matches`
386
387	`#-------------------------------------------------------------------------------`
388	`# regex matching ---------------------------------------------------------------`
389	`#-------------------------------------------------------------------------------`
390
391	`regex_match: (password, _regexen = REGEXEN) ->`
392	`matches = []`
393	`for name, regex of _regexen`
394	`regex.lastIndex = 0 # keeps regex_match stateless`
395	`while rx_match = regex.exec password`
396	`token = rx_match[0]`
397	`matches.push`
398	`pattern: 'regex'`
399	`token: token`
400	`i: rx_match.index`
401	`j: rx_match.index + rx_match[0].length - 1`
402	`regex_name: name`
403	`regex_match: rx_match`
404	`@sorted matches`
405
406	`#-------------------------------------------------------------------------------`
407	`# date matching ----------------------------------------------------------------`
408	`#-------------------------------------------------------------------------------`
409
410	`date_match: (password) ->`
411	`# a "date" is recognized as:`
412	`# any 3-tuple that starts or ends with a 2- or 4-digit year,`
413	`# with 2 or 0 separator chars (1.1.91 or 1191),`
414	`# maybe zero-padded (01-01-91 vs 1-1-91),`
415	`# a month between 1 and 12,`
416	`# a day between 1 and 31.`
417	`#`
418	`# note: this isn't true date parsing in that "feb 31st" is allowed,`
419	`# this doesn't check for leap years, etc.`
420	`#`
421	`# recipe:`
422	`# start with regex to find maybe-dates, then attempt to map the integers`
423	`# onto month-day-year to filter the maybe-dates into dates.`
424	`# finally, remove matches that are substrings of other matches to reduce noise.`
425	`#`
426	`# note: instead of using a lazy or greedy regex to find many dates over the full string,`
427	`# this uses a ^...$ regex against every substring of the password -- less performant but leads`
428	`# to every possible date match.`
429	`matches = []`
430	`maybe_date_no_separator = /^\d{4,8}$/`
431	`maybe_date_with_separator = ///`
432	`^`
433	`( \d{1,4} ) # day, month, year`
434	`( [\s/\\_.-] ) # separator`
435	`( \d{1,2} ) # day, month`
436	`\2 # same separator`
437	`( \d{1,4} ) # day, month, year`
438	`$`
439	`///`
440
441	`# dates without separators are between length 4 '1191' and 8 '11111991'`
442	`for i in [0..password.length - 4]`
443	`for j in [i + 3..i + 7]`
444	`break if j >= password.length`
445	`token = password[i..j]`
446	`continue unless maybe_date_no_separator.exec token`
447	`candidates = []`
448	`for [k,l] in DATE_SPLITS[token.length]`
449	`dmy = @map_ints_to_dmy [`
450	`parseInt token[0...k]`
451	`parseInt token[k...l]`
452	`parseInt token[l...]`
453	`]`
454	`candidates.push dmy if dmy?`
455	`continue unless candidates.length > 0`
456	`# at this point: different possible dmy mappings for the same i,j substring.`
457	`# match the candidate date that likely takes the fewest guesses: a year closest to 2000.`
458	`# (scoring.REFERENCE_YEAR).`
459	`#`
460	`# ie, considering '111504', prefer 11-15-04 to 1-1-1504`
461	`# (interpreting '04' as 2004)`
462	`best_candidate = candidates[0]`
463	`metric = (candidate) -> Math.abs candidate.year - scoring.REFERENCE_YEAR`
464	`min_distance = metric candidates[0]`
465	`for candidate in candidates[1..]`
466	`distance = metric candidate`
467	`if distance < min_distance`
468	`[best_candidate, min_distance] = [candidate, distance]`
469	`matches.push`
470	`pattern: 'date'`
471	`token: token`
472	`i: i`
473	`j: j`
474	`separator: ''`
475	`year: best_candidate.year`
476	`month: best_candidate.month`
477	`day: best_candidate.day`
478
479	`# dates with separators are between length 6 '1/1/91' and 10 '11/11/1991'`
480	`for i in [0..password.length - 6]`
481	`for j in [i + 5..i + 9]`
482	`break if j >= password.length`
483	`token = password[i..j]`
484	`rx_match = maybe_date_with_separator.exec token`
485	`continue unless rx_match?`
486	`dmy = @map_ints_to_dmy [`
487	`parseInt rx_match[1]`
488	`parseInt rx_match[3]`
489	`parseInt rx_match[4]`
490	`]`
491	`continue unless dmy?`
492	`matches.push`
493	`pattern: 'date'`
494	`token: token`
495	`i: i`
496	`j: j`
497	`separator: rx_match[2]`
498	`year: dmy.year`
499	`month: dmy.month`
500	`day: dmy.day`
501
502	`# matches now contains all valid date strings in a way that is tricky to capture`
503	`# with regexes only. while thorough, it will contain some unintuitive noise:`
504	`#`
505	`# '2015_06_04', in addition to matching 2015_06_04, will also contain`
506	`# 5(!) other date matches: 15_06_04, 5_06_04, ..., even 2015 (matched as 5/1/2020)`
507	`#`
508	`# to reduce noise, remove date matches that are strict substrings of others`
509	`@sorted matches.filter (match) ->`
510	`is_submatch = false`
511	`for other_match in matches`
512	`continue if match is other_match`
513	`if other_match.i <= match.i and other_match.j >= match.j`
514	`is_submatch = true`
515	`break`
516	`not is_submatch`
517
518	`map_ints_to_dmy: (ints) ->`
519	`# given a 3-tuple, discard if:`
520	`# middle int is over 31 (for all dmy formats, years are never allowed in the middle)`
521	`# middle int is zero`
522	`# any int is over the max allowable year`
523	`# any int is over two digits but under the min allowable year`
524	`# 2 ints are over 31, the max allowable day`
525	`# 2 ints are zero`
526	`# all ints are over 12, the max allowable month`
527	`return if ints[1] > 31 or ints[1] <= 0`
528	`over_12 = 0`
529	`over_31 = 0`
530	`under_1 = 0`
531	`for int in ints`
532	`return if 99 < int < DATE_MIN_YEAR or int > DATE_MAX_YEAR`
533	`over_31 += 1 if int > 31`
534	`over_12 += 1 if int > 12`
535	`under_1 += 1 if int <= 0`
536	`return if over_31 >= 2 or over_12 == 3 or under_1 >= 2`
537
538	`# first look for a four digit year: yyyy + daymonth or daymonth + yyyy`
539	`possible_year_splits = [`
540	`[ints[2], ints[0..1]] # year last`
541	`[ints[0], ints[1..2]] # year first`
542	`]`
543	`for [y, rest] in possible_year_splits`
544	`if DATE_MIN_YEAR <= y <= DATE_MAX_YEAR`
545	`dm = @map_ints_to_dm rest`
546	`if dm?`
547	`return {`
548	`year: y`
549	`month: dm.month`
550	`day: dm.day`
551	`}`
552	`else`
553	`# for a candidate that includes a four-digit year,`
554	`# when the remaining ints don't match to a day and month,`
555	`# it is not a date.`
556	`return`
557
558	`# given no four-digit year, two digit years are the most flexible int to match, so`
559	`# try to parse a day-month out of ints[0..1] or ints[1..0]`
560	`for [y, rest] in possible_year_splits`
561	`dm = @map_ints_to_dm rest`
562	`if dm?`
563	`y = @two_to_four_digit_year y`
564	`return {`
565	`year: y`
566	`month: dm.month`
567	`day: dm.day`
568	`}`
569
570	`map_ints_to_dm: (ints) ->`
571	`for [d, m] in [ints, ints.slice().reverse()]`
572	`if 1 <= d <= 31 and 1 <= m <= 12`
573	`return {`
574	`day: d`
575	`month: m`
576	`}`
577
578	`two_to_four_digit_year: (year) ->`
579	`if year > 99`
580	`year`
581	`else if year > 50`
582	`# 87 -> 1987`
583	`year + scoring.REFERENCE_YEAR - 100`
584	`else`
585	`# 15 -> 2015`
586	`year + scoring.REFERENCE_YEAR`
587
588	`module.exports = matching`