From 066bd7903cba86dbcbb17e027eea1103631a98dd Mon Sep 17 00:00:00 2001 From: Ronan-H Date: Thu, 9 Mar 2023 15:31:44 +0000 Subject: [PATCH] Implement an "unmunger" to solve #45 --- pom.xml | 4 +- .../nbvcxz/matching/DictionaryMatcher.java | 80 +-------------- .../nbvcxz/resources/Configuration.java | 26 +++-- .../resources/ConfigurationBuilder.java | 98 ++++++++++++------- .../resources/SubstitutionComboGen.java | 70 +++++++++++++ .../gosimple/nbvcxz/resources/TrieNode.java | 76 ++++++++++++++ .../matching/DictionaryMatcherTest.java | 41 ++++++++ 7 files changed, 275 insertions(+), 120 deletions(-) create mode 100644 src/main/java/me/gosimple/nbvcxz/resources/SubstitutionComboGen.java create mode 100644 src/main/java/me/gosimple/nbvcxz/resources/TrieNode.java diff --git a/pom.xml b/pom.xml index d46c5d1..c9ae086 100644 --- a/pom.xml +++ b/pom.xml @@ -79,8 +79,8 @@ maven-compiler-plugin 3.10.1 - 1.7 - 1.7 + 8 + 8 diff --git a/src/main/java/me/gosimple/nbvcxz/matching/DictionaryMatcher.java b/src/main/java/me/gosimple/nbvcxz/matching/DictionaryMatcher.java index 3ef275e..f53700c 100644 --- a/src/main/java/me/gosimple/nbvcxz/matching/DictionaryMatcher.java +++ b/src/main/java/me/gosimple/nbvcxz/matching/DictionaryMatcher.java @@ -2,14 +2,10 @@ import me.gosimple.nbvcxz.matching.match.DictionaryMatch; import me.gosimple.nbvcxz.matching.match.Match; -import me.gosimple.nbvcxz.resources.Configuration; +import me.gosimple.nbvcxz.resources.*; import me.gosimple.nbvcxz.resources.Dictionary; -import me.gosimple.nbvcxz.resources.DictionaryBuilder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.TreeMap; +import java.util.*; /** * Look for every part of the password that match an entry in our dictionaries @@ -18,74 +14,6 @@ */ public final class DictionaryMatcher implements PasswordMatcher { - /** - * Removes all leet substitutions from the password and returns a list of plain text versions. - * - * @param configuration the configuration file used to estimate entropy. - * @param password the password to translate from leet. - * @return a list of all combinations of possible leet translations for the password with all leet removed. - */ - private static List translateLeet(final Configuration configuration, final String password) - { - final List translations = new ArrayList(); - final TreeMap replacements = new TreeMap<>(); - - for (int i = 0; i < password.length(); i++) - { - final Character[] replacement = configuration.getLeetTable().get(password.charAt(i)); - if (replacement != null) - { - replacements.put(i, replacement); - } - } - - // Do not bother continuing if we're going to replace every single character - if(replacements.keySet().size() == password.length()) - return translations; - - if (replacements.size() > 0) - { - final char[] password_char = new char[password.length()]; - for (int i = 0; i < password.length(); i++) - { - password_char[i] = password.charAt(i); - } - replaceAtIndex(replacements, replacements.firstKey(), password_char, translations); - } - - return translations; - } - - /** - * Internal function to recursively build the list of un-leet possibilities. - * - * @param replacements TreeMap of replacement index, and the possible characters at that index to be replaced - * @param current_index internal use for the function - * @param password a Character array of the original password - * @param final_passwords List of the final passwords to be filled - */ - private static void replaceAtIndex(final TreeMap replacements, Integer current_index, final char[] password, final List final_passwords) - { - for (final char replacement : replacements.get(current_index)) - { - password[current_index] = replacement; - if (current_index.equals(replacements.lastKey())) - { - final_passwords.add(new String(password)); - } - else if (final_passwords.size() > 100) - { - // Give up if we've already made 100 replacements - return; - } - else - { - replaceAtIndex(replacements, replacements.higherKey(current_index), password, final_passwords); - } - } - } - - /** * Gets the substitutions for the password. * @@ -279,6 +207,8 @@ public List match(final Configuration configuration, final String passwor final List matches = new ArrayList<>(); + final SubstitutionComboGen substitutionComboGen = new SubstitutionComboGen(configuration.getTrieNodeRoot()); + // Create all possible sub-sequences of the password for (int start = 0; start < password.length(); start++) { @@ -314,7 +244,7 @@ public List match(final Configuration configuration, final String passwor // Only do unleet if it's different than the regular lower. if (dictionary.getMaxLength() > split_password.length()) { - final List unleet_list = translateLeet(configuration, lower_part); + final List unleet_list = substitutionComboGen.getAllSubCombos(lower_part, configuration.getSubstituteComboLimit()); for (final String unleet_part : unleet_list) { final Integer unleet_rank = dictionary.getDictonary().get(unleet_part); diff --git a/src/main/java/me/gosimple/nbvcxz/resources/Configuration.java b/src/main/java/me/gosimple/nbvcxz/resources/Configuration.java index 9d72043..7c23616 100644 --- a/src/main/java/me/gosimple/nbvcxz/resources/Configuration.java +++ b/src/main/java/me/gosimple/nbvcxz/resources/Configuration.java @@ -1,9 +1,6 @@ package me.gosimple.nbvcxz.resources; -import me.gosimple.nbvcxz.matching.DictionaryMatcher; -import me.gosimple.nbvcxz.matching.PasswordMatcher; -import me.gosimple.nbvcxz.matching.SpacialMatcher; -import me.gosimple.nbvcxz.matching.YearMatcher; +import me.gosimple.nbvcxz.matching.*; import java.util.List; import java.util.Locale; @@ -22,10 +19,11 @@ public class Configuration private final Map guessTypes; private final List dictionaries; private final List adjacencyGraphs; - private final Map leetTable; + private final TrieNode trieNodeRoot; private final Pattern yearPattern; private final Double minimumEntropy; private final Integer maxLength; + private final Integer substituteComboLimit; private final Locale locale; private final boolean distanceCalc; private final ResourceBundle mainResource; @@ -37,23 +35,24 @@ public class Configuration * @param guessTypes Map of types of guesses, and associated guesses/sec * @param dictionaries List of {@link Dictionary} to use for the {@link DictionaryMatcher} * @param adjacencyGraphs List of adjacency graphs to be used by the {@link SpacialMatcher} - * @param leetTable Leet table for use with {@link DictionaryMatcher} + * @param trieNodeRoot Root trie node to help find possible string substitutions, for use with {@link DictionaryMatcher} * @param yearPattern Regex {@link Pattern} for use with {@link YearMatcher} * @param minimumEntropy Minimum entropy value passwords should meet * @param locale Locale for localized text and feedback * @param distanceCalc Enable or disable levenshtein distance calculation for dictionary matches * @param combinationAlgorithmTimeout Timeout for the findBestMatches algorithm */ - public Configuration(List passwordMatchers, Map guessTypes, List dictionaries, List adjacencyGraphs, Map leetTable, Pattern yearPattern, Double minimumEntropy, Integer maxLength, Locale locale, boolean distanceCalc, long combinationAlgorithmTimeout) + public Configuration(List passwordMatchers, Map guessTypes, List dictionaries, List adjacencyGraphs, TrieNode trieNodeRoot, Pattern yearPattern, Double minimumEntropy, Integer maxLength, Integer substituteComboLimit, Locale locale, boolean distanceCalc, long combinationAlgorithmTimeout) { this.passwordMatchers = passwordMatchers; this.guessTypes = guessTypes; this.dictionaries = dictionaries; this.adjacencyGraphs = adjacencyGraphs; - this.leetTable = leetTable; + this.trieNodeRoot = trieNodeRoot; this.yearPattern = yearPattern; this.minimumEntropy = minimumEntropy; this.maxLength = maxLength; + this.substituteComboLimit = substituteComboLimit; this.locale = locale; this.distanceCalc = distanceCalc; this.mainResource = ResourceBundle.getBundle("main", locale); @@ -96,9 +95,9 @@ public List getAdjacencyGraphs() /** * @return Leet table for use with {@link DictionaryMatcher} */ - public Map getLeetTable() + public TrieNode getTrieNodeRoot() { - return leetTable; + return trieNodeRoot; } /** @@ -124,6 +123,13 @@ public Integer getMaxLength() { return maxLength; } + /** + * @return The default maximum number of string combos to generate, based on the possible string substitutions. + */ + public Integer getSubstituteComboLimit() { + return substituteComboLimit; + } + /** * @return Locale for localized text and feedback */ diff --git a/src/main/java/me/gosimple/nbvcxz/resources/ConfigurationBuilder.java b/src/main/java/me/gosimple/nbvcxz/resources/ConfigurationBuilder.java index 2181bf0..2f777a7 100644 --- a/src/main/java/me/gosimple/nbvcxz/resources/ConfigurationBuilder.java +++ b/src/main/java/me/gosimple/nbvcxz/resources/ConfigurationBuilder.java @@ -33,7 +33,7 @@ public class ConfigurationBuilder private static final List defaultDictionaries = new ArrayList<>(); private static final List defaultPasswordMatchers = new ArrayList<>(); private static final List defaultAdjacencyGraphs = new ArrayList<>(); - private static final Map defaultLeetTable = new HashMap<>(); + private static final TrieNode defaultTrieNodeRoot; static { @@ -56,38 +56,59 @@ public class ConfigurationBuilder defaultAdjacencyGraphs.add(new AdjacencyGraph("Standard Keypad", AdjacencyGraphUtil.standardKeypad)); defaultAdjacencyGraphs.add(new AdjacencyGraph("Mac Keypad", AdjacencyGraphUtil.macKeypad)); - defaultLeetTable.put('4', new Character[]{'a'}); - defaultLeetTable.put('@', new Character[]{'a'}); - defaultLeetTable.put('8', new Character[]{'b'}); - defaultLeetTable.put('(', new Character[]{'c'}); - defaultLeetTable.put('{', new Character[]{'c'}); - defaultLeetTable.put('[', new Character[]{'c'}); - defaultLeetTable.put('<', new Character[]{'c'}); - defaultLeetTable.put('3', new Character[]{'e'}); - defaultLeetTable.put('9', new Character[]{'g'}); - defaultLeetTable.put('6', new Character[]{'g'}); - defaultLeetTable.put('&', new Character[]{'g'}); - defaultLeetTable.put('#', new Character[]{'h'}); - defaultLeetTable.put('!', new Character[]{'i', 'l'}); - defaultLeetTable.put('1', new Character[]{'i', 'l'}); - defaultLeetTable.put('|', new Character[]{'i', 'l'}); - defaultLeetTable.put('0', new Character[]{'o'}); - defaultLeetTable.put('$', new Character[]{'s'}); - defaultLeetTable.put('5', new Character[]{'s'}); - defaultLeetTable.put('+', new Character[]{'t'}); - defaultLeetTable.put('7', new Character[]{'t', 'l'}); - defaultLeetTable.put('%', new Character[]{'x'}); - defaultLeetTable.put('2', new Character[]{'z'}); + defaultTrieNodeRoot = new TrieNode() + // simple single character substitutions (mostly leet speak) + .addSub("4", "a") + .addSub("@", "a") + .addSub("8", "b") + .addSub("(", "c") + .addSub("{", "c") + .addSub("[", "c") + .addSub("<", "c", "k", "v") + .addSub(">", "v") + .addSub("3", "e") + .addSub("9", "g", "q") + .addSub("6", "d", "g") + .addSub("&", "g") + .addSub("#", "f", "h") + .addSub("!", "i", "l") + .addSub("1", "i", "l") + .addSub("|", "i", "l") + .addSub("0", "o") + .addSub("$", "s") + .addSub("5", "s") + .addSub("+", "t") + .addSub("7", "t", "l") + .addSub("%", "x") + .addSub("2", "z") + // extra "munged" variations from here: https://en.wikipedia.org/wiki/Munged_password + .addSub("?", "y") // (y = why?) + .addSub("uu", "w") + .addSub("vv", "w") + .addSub("nn", "m") + .addSub("2u", "uu", "w") + .addSub("2v", "vv", "w") + .addSub("2n", "nn", "m") + .addSub("2b", "bb") + .addSub("2d", "dd") + .addSub("2g", "gg") + .addSub("2l", "ll") + .addSub("2p", "pp") + .addSub("2t", "tt") + .addSub("\\/\\/", "w") + .addSub("/\\/\\", "m") + .addSub("|)", "d"); } private List passwordMatchers; private Map guessTypes; private List dictionaries; private List adjacencyGraphs; - private Map leetTable; + private TrieNode trieNodeRoot; private Pattern yearPattern; private Double minimumEntropy; private Integer maxLength; + private Integer substituteComboLimit; private Locale locale; private Boolean distanceCalc; private Long combinationAlgorithmTimeout; @@ -180,11 +201,11 @@ public static List getDefaultAdjacencyGraphs() } /** - * @return The default table of common english leet substitutions + * @return The default trie node root to find string substitutions */ - public static Map getDefaultLeetTable() + public static TrieNode getTrieNodeRoot() { - return new HashMap<>(defaultLeetTable); + return defaultTrieNodeRoot; } /** @@ -212,6 +233,14 @@ public static int getDefaultMaxLength() return 256; } + /** + * @return The default maximum number of password substitutions combos to generate, for a given password + */ + public static int getSubstituteComboLimit() + { + return 250; + } + /** * @return the default is false */ @@ -289,12 +318,12 @@ public ConfigurationBuilder setAdjacencyGraphs(List adjacencyGra /** * The leet table is used to check within a password for common character substitutions (e.g. s to $). * - * @param leetTable Map for leetTable + * @param trieNodeRoot Map for leetTable * @return Builder */ - public ConfigurationBuilder setLeetTable(Map leetTable) + public ConfigurationBuilder setLeetTable(TrieNode trieNodeRoot) { - this.leetTable = leetTable; + this.trieNodeRoot = trieNodeRoot; return this; } @@ -445,9 +474,9 @@ public Configuration createConfiguration() { adjacencyGraphs = getDefaultAdjacencyGraphs(); } - if (leetTable == null) + if (trieNodeRoot == null) { - leetTable = getDefaultLeetTable(); + trieNodeRoot = getTrieNodeRoot(); } if (yearPattern == null) { @@ -461,6 +490,9 @@ public Configuration createConfiguration() { maxLength = getDefaultMaxLength(); } + if (substituteComboLimit == null) { + substituteComboLimit = getSubstituteComboLimit(); + } if (locale == null) { locale = Locale.getDefault(); @@ -473,7 +505,7 @@ public Configuration createConfiguration() { combinationAlgorithmTimeout = getDefaultCombinationAlgorithmTimeout(); } - return new Configuration(passwordMatchers, guessTypes, dictionaries, adjacencyGraphs, leetTable, yearPattern, minimumEntropy, maxLength, locale, distanceCalc, combinationAlgorithmTimeout); + return new Configuration(passwordMatchers, guessTypes, dictionaries, adjacencyGraphs, trieNodeRoot, yearPattern, minimumEntropy, maxLength, substituteComboLimit, locale, distanceCalc, combinationAlgorithmTimeout); } diff --git a/src/main/java/me/gosimple/nbvcxz/resources/SubstitutionComboGen.java b/src/main/java/me/gosimple/nbvcxz/resources/SubstitutionComboGen.java new file mode 100644 index 0000000..78c2994 --- /dev/null +++ b/src/main/java/me/gosimple/nbvcxz/resources/SubstitutionComboGen.java @@ -0,0 +1,70 @@ +package me.gosimple.nbvcxz.resources; + +import java.util.ArrayList; +import java.util.List; + +public class SubstitutionComboGen { + private final TrieNode trieRoot; + + public SubstitutionComboGen(TrieNode trieRoot) { + this.trieRoot = trieRoot; + } + + /** + * Generates all possible combinations of a string against the root {@link TrieNode}. + * @param str The string to generate combinations from + * @param limit Limit number of combinations to generate + * @return List of combinations + */ + public List getAllSubCombos(final String str, final int limit) { + final List combos = new ArrayList<>(); + getAllSubCombos(str, 0, new StringBuilder(), combos, limit); + return combos; + } + + private void getAllSubCombos(final String substr, int index, StringBuilder buffer, final List finalPasswords, final int limit) + { + if (finalPasswords.size() >= limit) return; + + if (index == substr.length()) + { + // reached the end; add the contents of the buffer to the list of combinations + finalPasswords.add(buffer.toString()); + return; + } + + final char firstChar = substr.charAt(index); + + // first, generate all combos without doing a substitution at this index + buffer.append(firstChar); + getAllSubCombos(substr, index + 1, buffer, finalPasswords, limit); + buffer.setLength(buffer.length() - 1); + + // next, exhaust all possible substitutions at this index + TrieNode cur = trieRoot; + for (int i = index; i < substr.length(); i++) + { + final char c = substr.charAt(i); + cur = cur.getChild(c); + if (cur == null) + { + return; + } + + if (cur.isTerminal()) + { + String[] subs = cur.getSubs(); + for (String sub : subs) + { + buffer.append(sub); + // recursively build the rest of the string + getAllSubCombos(substr, i + 1, buffer, finalPasswords, limit); + // backtrack by ignoring the added postfix + buffer.setLength(buffer.length() - sub.length()); + + if (finalPasswords.size() >= limit) return; + } + } + } + } +} diff --git a/src/main/java/me/gosimple/nbvcxz/resources/TrieNode.java b/src/main/java/me/gosimple/nbvcxz/resources/TrieNode.java new file mode 100644 index 0000000..c9104a0 --- /dev/null +++ b/src/main/java/me/gosimple/nbvcxz/resources/TrieNode.java @@ -0,0 +1,76 @@ +package me.gosimple.nbvcxz.resources; + +import java.util.HashMap; +import java.util.Map; + +/** + * Represents a node in a trie of possible string substitutions. + */ +public class TrieNode +{ + private final Map children; + private String[] subs; + + public TrieNode() + { + children = new HashMap<>(); + } + + /** + * Adds a list of possible substitutions for a given string to the node. + * @param key The string that can be substituted + * @param subs An array of possible substitutions + * @return The same node, for method chaining + */ + public TrieNode addSub(String key, String...subs) + { + final char firstChar = key.charAt(0); + if (!children.containsKey(firstChar)) + { + children.put(firstChar, new TrieNode()); + } + TrieNode cur = children.get(firstChar); + for (int i = 1; i < key.length(); i++) + { + final char c = key.charAt(i); + if (!cur.hasChild(c)) + { + cur.addChild(c); + } + cur = cur.getChild(c); + } + cur.setSubs(subs); + + return this; + } + + public TrieNode getChild(Character child) { + return children.get(child); + } + + public boolean isTerminal() + { + return subs != null; + } + + public String[] getSubs() { + return subs; + } + + private void setSubs(String[] sub) + { + this.subs = sub; + } + + private void addChild(Character child) + { + if (!hasChild(child)) { + children.put(child, new TrieNode()); + } + } + + private boolean hasChild(Character child) + { + return children.containsKey(child); + } +} diff --git a/src/test/java/me/gosimple/nbvcxz/matching/DictionaryMatcherTest.java b/src/test/java/me/gosimple/nbvcxz/matching/DictionaryMatcherTest.java index 3127994..c66674b 100644 --- a/src/test/java/me/gosimple/nbvcxz/matching/DictionaryMatcherTest.java +++ b/src/test/java/me/gosimple/nbvcxz/matching/DictionaryMatcherTest.java @@ -9,7 +9,10 @@ import org.junit.Test; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; /** * @author Adam Brusselback @@ -190,6 +193,44 @@ public void testDictionaryMatchLD() } + /** + * Test of passwords that contain substituted substrings of arbitrary lengths + * (E.g. 'w' could be written as 'uu') + */ + @Test + public void testArbitraryLengthSubstitutions() + { + System.out.println("Test of arbitrary length substitutions, of class DictionaryMatcher"); + + PasswordMatcher matcher = new DictionaryMatcher(); + + // create a table of expected dictionary matches + Map mappings = new HashMap<>(); + mappings.put("P@55uu0rd", "password"); // uu = w (from issue #45) + mappings.put("/\\/\\3GA", "mega"); // /\/\ = m + mappings.put("|)R!2b|3", "dribble"); // |) = D, 2b = bb + mappings.put("/\\/\\02!2la", "mozilla"); // /\/\02!2l4 (2l = l) + mappings.put("B02t13", "bottle"); // 2t = tt + mappings.put("nn!|)|)l3", "middle"); // nn = m + mappings.put("so2n3", "some"); // 2n could mean nn or m, make sure 'm' is used + mappings.put("pe2n", "penn"); // same as above, but expecting 2n = nn + + for (String substitutedPass : mappings.keySet()) + { + // make a list of all the dictionary matches that were made, + // using streams to convert a list of matches to a list of each match's dictionary value + List dictValues = matcher.match(configuration, substitutedPass) + .stream() + .map(DictionaryMatch.class::cast) + .map(DictionaryMatch::getDictionaryValue) + .collect(Collectors.toList()); + + // make sure the converted version is in the list of dictionary value matches somewhere + String converted = mappings.get(substitutedPass); + String message = String.format("'%s' did not get matched to '%s'", substitutedPass, converted); + Assert.assertTrue(message, dictValues.contains(converted)); + } + } /** * Test of match method, of class DictionaryMatcher, using LD.