From 44e77548ff090f8ceedc7ec375821a5dcc83a043 Mon Sep 17 00:00:00 2001 From: md678685 Date: Wed, 11 Apr 2018 22:01:28 +0100 Subject: [PATCH] Add basic nonsense generator Building sentences from tokens in WordUtil needs work, as does generating sentence lists and saving new entries to the corpus which shouldn't happen on the message handler thread. --- src/main/java/org/moss/discord/Constants.java | 1 + src/main/java/org/moss/discord/Main.java | 2 + .../java/org/moss/discord/fun/Nonsense.java | 122 ++++++++++++++++++ .../discord/listeners/NonsenseListener.java | 80 ++++++++++++ .../java/org/moss/discord/util/WordUtil.java | 43 ++++++ 5 files changed, 248 insertions(+) create mode 100644 src/main/java/org/moss/discord/fun/Nonsense.java create mode 100644 src/main/java/org/moss/discord/listeners/NonsenseListener.java create mode 100644 src/main/java/org/moss/discord/util/WordUtil.java diff --git a/src/main/java/org/moss/discord/Constants.java b/src/main/java/org/moss/discord/Constants.java index f25435c..edbdd79 100644 --- a/src/main/java/org/moss/discord/Constants.java +++ b/src/main/java/org/moss/discord/Constants.java @@ -12,6 +12,7 @@ public class Constants { public static final String CHANNEL_STARBOARD = ""; // TODO: fill these in once created public static final String CHANNEL_MODLOG = "430895774075846656"; + public static final String CHANNEL_NONSENSE = ""; // Roles diff --git a/src/main/java/org/moss/discord/Main.java b/src/main/java/org/moss/discord/Main.java index 03d3647..24d3503 100644 --- a/src/main/java/org/moss/discord/Main.java +++ b/src/main/java/org/moss/discord/Main.java @@ -6,6 +6,7 @@ import org.javacord.api.DiscordApiBuilder; import org.moss.discord.commands.BStatsCommand; import org.moss.discord.listeners.ModLogListeners; +import org.moss.discord.listeners.NonsenseListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +37,7 @@ public static void main(String[] args) { commandHandler.registerCommand(new BStatsCommand()); api.addListener(new ModLogListeners(api)); + api.addMessageCreateListener(new NonsenseListener(api)); } } diff --git a/src/main/java/org/moss/discord/fun/Nonsense.java b/src/main/java/org/moss/discord/fun/Nonsense.java new file mode 100644 index 0000000..c78cd32 --- /dev/null +++ b/src/main/java/org/moss/discord/fun/Nonsense.java @@ -0,0 +1,122 @@ +package org.moss.discord.fun; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; + +import org.moss.discord.util.WordUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.moss.discord.util.WordUtil.*; + +public class Nonsense { + private static final int order = 3; + private static final String start = "B"; + private static final String end = "Z"; + + private static final ExecutorService executor = Executors.newFixedThreadPool(16); + private static final Logger logger = LoggerFactory.getLogger(Nonsense.class); + private static final Random random = new Random(); + + private AtomicInteger linesProcessed; + private AtomicInteger nodesProcessed; + + private ConcurrentMap> nodes = new ConcurrentHashMap<>(); + private CompletableFuture>> ready = new CompletableFuture<>(); + + public Nonsense(String corpus) { + CompletableFuture[] tasks = addToChain(corpus); + + CompletableFuture.allOf(tasks).thenRunAsync(() -> { + ready.complete(nodes); + }, executor); + } + + @SuppressWarnings("unchecked") + public CompletableFuture[] addToChain(String corpus) { + List lines = splitLines(corpus); + linesProcessed = new AtomicInteger(0); + nodesProcessed = new AtomicInteger(0); + + CompletableFuture[] tasks = (CompletableFuture[]) lines.stream() + .map(this::addControlChars) + .map(WordUtil::splitWords) + .map(WordUtil::removeEmpty) + .map(words -> CompletableFuture.runAsync(() -> parseLine(words), executor)) + .toArray(CompletableFuture[]::new); + + return tasks; + } + + public void parseLine(List words) { + for (int i = order; i < words.size(); i++) { + String prefix = joinWords(words.subList(i - order, i)); + String suffix = words.get(i); + + nodes.putIfAbsent(prefix, new ConcurrentHashMap<>()); + + ConcurrentMap node = nodes.get(prefix); + node.putIfAbsent(suffix, new AtomicInteger(0)); + + node.get(suffix).incrementAndGet(); + nodesProcessed.incrementAndGet(); + } + int lines = linesProcessed.incrementAndGet(); + if (lines % 1000 == 0) logger.info("Parsed {} lines and {} nodes", lines, nodesProcessed.get()); + } + + private String addControlChars(String original) { + return getInitialPrefixNode() + original + " " + end; // Add initial prefix and terminator + } + + public String generateNonsense() { + return predictNext(getInitialPrefixNode()).replace(getInitialPrefixNode(), ""); + } + + private String predictNext(String sentence) { + List words = removeEmpty(new ArrayList<>(splitWords(sentence))); + String prefix = joinWords(words.subList(words.size() - order, words.size())); + Map suffixMap = nodes.get(prefix); + + if (suffixMap == null) return sentence; + + List suffixes = new ArrayList<>(); + suffixMap.forEach((suffix, chance) -> { + for (int i = 0; i < chance.get(); i++) { + suffixes.add(suffix); + } + }); + + String suffix = suffixes.get(random.nextInt(suffixes.size())); + if (suffix.equals(end)) return joinWords(words); + + words.add(suffix); + String result = joinWords(words); + //logger.info("'{}' + '{}' -> '{}'", prefix, suffix, result); + return predictNext(result); + } + + public CompletableFuture>> getReady() { + return ready; + } + + private String getInitialPrefixNode() { + String node = start; + + for (int i = 1; i < order; i++) { + node = node + " " + start; + } + + return node; + } + +} \ No newline at end of file diff --git a/src/main/java/org/moss/discord/listeners/NonsenseListener.java b/src/main/java/org/moss/discord/listeners/NonsenseListener.java new file mode 100644 index 0000000..b57a5d9 --- /dev/null +++ b/src/main/java/org/moss/discord/listeners/NonsenseListener.java @@ -0,0 +1,80 @@ +package org.moss.discord.listeners; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Scanner; + +import org.javacord.api.DiscordApi; +import org.javacord.api.entity.channel.TextChannel; +import org.javacord.api.entity.message.Message; +import org.javacord.api.event.message.MessageCreateEvent; +import org.javacord.api.listener.message.MessageCreateListener; +import org.javacord.api.util.DiscordRegexPattern; +import org.moss.discord.Constants; +import org.moss.discord.fun.Nonsense; + +public class NonsenseListener implements MessageCreateListener { + private static File corpusFile = new File("./corpus.txt"); + + private DiscordApi api; + private Nonsense nonsense; + + private boolean ready = false; + + public NonsenseListener(DiscordApi discordApi) { + api = discordApi; + + String corpus = ""; + + try { + Scanner scanner = new Scanner(corpusFile); + scanner.useDelimiter("\\Z"); + corpus = scanner.next() + .replaceAll("[“”]", "\"") + .replaceAll("[‘’]", "\'"); + scanner.close(); + } catch (FileNotFoundException ignored) {} + + nonsense = new Nonsense(corpus); + + nonsense.getReady().whenCompleteAsync((chain, throwable) -> { + if (throwable == null) ready = true; + }); + } + + @Override + public void onMessageCreate(MessageCreateEvent event) { + api.getMessageById(event.getMessageId(), event.getChannel()).thenAccept(this::handleMessage); + } + + private void handleMessage(Message message) { + TextChannel channel = message.getChannel(); + if (!ready + || !channel.getIdAsString().equals(Constants.CHANNEL_NONSENSE) + || message.getAuthor().isYourself()) return; + + if (message.getMentionedUsers().contains(api.getYourself())) { + String response = message.getAuthor().asUser().get().getMentionTag() + " " + nonsense.generateNonsense(); + channel.sendMessage(response); + } else { + storeMessage(message.getContent()); + } + } + + private void storeMessage(String message) { + String filtered = message.replaceAll(DiscordRegexPattern.CHANNEL_MENTION.pattern(), "") + .replaceAll(DiscordRegexPattern.CUSTOM_EMOJI.pattern(), "") + .replaceAll(DiscordRegexPattern.ROLE_MENTION.pattern(), "") + .replaceAll(DiscordRegexPattern.USER_MENTION.pattern(), ""); + + try { + FileWriter fw = new FileWriter(corpusFile, true); + fw.append("\n" + filtered); + fw.close(); + } catch (IOException ignored) {} + + nonsense.addToChain(filtered); + } +} \ No newline at end of file diff --git a/src/main/java/org/moss/discord/util/WordUtil.java b/src/main/java/org/moss/discord/util/WordUtil.java new file mode 100644 index 0000000..c8677b6 --- /dev/null +++ b/src/main/java/org/moss/discord/util/WordUtil.java @@ -0,0 +1,43 @@ +package org.moss.discord.util; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class WordUtil { + + public static List splitLines(String input) { + return Arrays.asList(input.split("\\r?\\n")); + } + + public static List splitWords(String input) { + return Arrays.asList(input.split("\\b")); + } + + public static List removeEmpty(List strings) { + return strings.stream() + .map(String::trim) + .filter(s -> !s.equalsIgnoreCase("")) + .collect(Collectors.toList()); + } + + public static String joinWords(List words) { + Iterator i = removeEmpty(words).iterator(); + if (!i.hasNext()) return ""; + + String result = i.next(); + + while (i.hasNext()) { + String next = i.next(); + result = result + + (Pattern.matches("\\p{Punct}", next) ? "" : " ") + next; + } + + return result; + } + + + +}