nittoco · nittoco · Dec 15, 2024 · TORUS0818 · Dec 16, 2024 · TORUS0818
diff --git a/139. Word Break.md b/139. Word Break.md
@@ -0,0 +1,294 @@
+### Step1
+
+- まずパッと書いてのは以下のような感じ
+- やや見にくいと思う(ちょっとネストが深いので関数化したい)
+
+```python
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        if len(s) == 0:
+            raise ValueError("String must be at least 1 length")
+        #can_segmented_so_far[i] := sのindex範囲が[0, i)の時に、分けることができるか
+        can_segmented_so_far = [False] * (len(s) + 1)
+        can_segmented_so_far[0] = True
+        for current_index in range(1, len(s) + 1):
+            for previous_index in reversed(range(current_index)):
+                if s[previous_index: current_index] in wordDict and can_segmented_so_far[previous_index]:
+                    can_segmented_so_far[current_index] = True
+                    break
+        return can_segmented_so_far[-1]
+```
+
+- どうせDPにしても命名長くなるし、関数cacheにしちゃってもいいかあ
+    - ネストも浅くなるし
+
+```python
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        if len(s) == 0:
+            raise ValueError("String must be at least 1 length")
+
+        @cache
+        def _can_be_segmented_finishing_this_length(length):
+            if length == 0:
+                return True
+            for last_segmented in reversed(range(length)):
+                if not _can_be_segmented_finishing_this_length(last_segmented):
+                    continue
+                if s[last_segmented: length] in wordDict:
+                    return True
+            return False
+
+        return _can_be_segmented_finishing_this_length(len(s))
+```
+
+## Step2
+
+https://github.com/hayashi-ay/leetcode/pull/61 
+
+- 各indexで分割できるか調べる時、wordDictでループを回した方がわかりやすかったかも
+- startswithというメソッドがある
+    - https://docs.python.org/3/library/stdtypes.html#str.startswith
+    - タプルで複数候補を入れれたり、startとendを指定できる
+    - wordDictでループを回すなら確かに使いたくなるかも
+    - breakable[i + len(word)] がすでにTrueでも何回もTrueしてしまう欠点はあり
+
+```python
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        # breakable[i] := [0, i)の範囲だったときに分割できるか
+        breakable = [False] * (len(s) + 1)
+        breakable[0] = True
+        for i in range(len(s)):
+            if not breakable[i]:
+                continue
+            for word in wordDict:
+                if not s.startswith(word, i):
+                    continue
+                breakable[i + len(word)] = True
+        return breakable[-1]
+```
+
+- 一応実装を見てみる
+- Trie木で実装することもできる
+    - return Falseを上に持ってきたくてwhileループで書いたが、普通にforループの方がわかりやすかったのか微妙
+
+```python
+
+class TrieTreeNode:
+    def __init__(self, character=None):
+        self.char_to_child_node = {}
+        self.is_end_of_word = False
+
+class TrieTree:
+    def __init__(self, words):
+        self.root = TrieTreeNode()
+        for word in words:
+            self.add_word(word)
+
+    def add_word(self, word):
+        node = self.root
+        for c in word:
+            if c not in node.char_to_child_node:
+                node.char_to_child_node[c] = TrieTreeNode()
+            node = node.char_to_child_node[c] 
+        node.is_end_of_word = True
+
+    def is_word_exist(self, word):
+        node = self.root
+        for c in word:
+            if c not in node.char_to_child_node:
+                return False
+            node = node.char_to_child_node[c] 
+        return node.is_end_of_word
+
+    def move_to_child_node(self, node, next_char):
+        if next_char not in node.char_to_child_node:
+            return None
+        return node.char_to_child_node[next_char]
+
+
+# 各sのindexで、そこからstartして、wordの終わりまで辿れるかを調べる
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        trie = TrieTree(wordDict)
+
+        @cache
+        def can_split_removing_first_this_length(removed_length):
+            if removed_length == len(s):
+                return True
+            node = trie.root
+            index = removed_length
+            while True:
+                if index >= len(s):
+                    return False
+                node = trie.move_to_child_node(node, s[index])
+                if not node:
+                    return False
+                if not node.is_end_of_word:
+                    index += 1
+                    continue
+                if not can_split_removing_first_this_length(index + 1):
+                    index += 1
+                    continue
+                return True
+
+        return can_split_removing_first_this_length(0)
+```
+
+https://github.com/fhiyo/leetcode/pull/40 
+
+- Trie木のなかに、sのprefixでwordDictにあるものを全て取ってくる機能をつけてるが、いるのかな
+    - 今がwordDictのなかにある単語の終わりかどうかはすぐわかるので、それで十分なような？
+- treeをwordDictから具体的にbuildするのは、クラスの初期化コードに含まない方がいいのだろうか
+
+https://github.com/goto-untrapped/Arai60/pull/20 
+
+- これは、startでsplittableなところから始めて、trie木でwordが見つかったらそのendを入れてる。自分の上のtrieのコードはsplittableかどうかを後ろからcacheしてるが、比較的自分に近い
+- ループの中でいちいちTrie木作る必要はあるのかな
+
+https://github.com/SuperHotDogCat/coding-interview/pull/23 
+
+- 関数を作ってさらにcacheの配列を作るよりは、@cacheするか配列のみで実装した方がわかりやすく感じた
+
+https://github.com/philip82148/leetcode-arai60/pull/8/files
+
+- Aho-Corasickでの実装(間違い)
+    - Solutionクラスで、can_split[i - node.depth + 1]のcheckだけにすると、前にたどれるだけたどる以外も最適の場合があるのでだめ
+    - 結局どの長さのwordのendかを、全部持たないといけない？
+        - じゃあ結局線形ではできない？
+    - アホコラ自体の実装は合ってるはず？
+- TrieTreeクラスもAhoCoraクラスもコンストラクタで色々な操作をやっているが、あまり良くないのだろうか??(よくわからない)
+    - chatGPTにこの悩みを相談したら、[@classmethod](https://docs.python.org/3/library/functions.html#classmethod)とかもいいですよと言ってきた。これどうなんだろう
+
+```python
+
+class TrieTreeNode:
+    def __init__(self, character: Optional[str] = None, depth: int = 0) -> None:
+        self.character = character
+        self.char_to_child_node = {}
+        self.failure_link = None
+        self.is_end_of_word = False
+        self.depth = depth
+
+class TrieTree:
+    def __init__(self, words: str) -> None:
+        self.root = TrieTreeNode()
+        for word in words:
+            self.add_word(word)
+
+    def add_word(self, word: str) -> None:
+        node = self.root
+        for c in word:
+            if c not in node.char_to_child_node:
+                node.char_to_child_node[c] = TrieTreeNode(
+                    character=c, depth=node.depth + 1
+                )
+            node = node.char_to_child_node[c]
+        node.is_end_of_word = True
+
+    def is_word_exist(self, word: str) -> bool:
+        node = self.root
+        for c in word:
+            if c not in node.char_to_child_node:
+                return False
+            node = node.char_to_child_node[c]
+        return node.is_end_of_word
+
+class AhoCorasick:
+    def __init__(self, words: List[str]) -> None:
+        self.trie = TrieTree(words)
+        self.add_failure_link()
+
+    def search_failure_node(
+        self, node: TrieTreeNode, parent_node: TrieTreeNode
+    ) -> None:
+        parent_of_failure = parent_node.failure_link
+        while True:
+            if not parent_of_failure:
+                return self.trie.root
+            if node.character not in parent_of_failure.char_to_child_node:
+                parent_of_failure = parent_of_failure.failure_link
+                continue
+            return parent_of_failure.char_to_child_node[node.character]
+
+    def add_failure_link(self) -> None:
+        current_level_nodes = [self.trie.root]
+        while current_level_nodes:
+            next_level_nodes = []
+            for node in current_level_nodes:
+                for child_node in node.char_to_child_node.values():
+                    next_level_nodes.append(child_node)
+                    child_node.failure_link = self.search_failure_node(child_node, node)
+                    if not child_node.failure_link.is_end_of_word:
+                        continue
+                    child_node.is_end_of_word = True
+            current_level_nodes = next_level_nodes
+
+    def move_to_next_node(self, node: TrieTreeNode, next_char: str) -> TrieTreeNode:
+        while next_char not in node.char_to_child_node:
+            node = node.failure_link
+            if not node:
+                return self.trie.root
+        return node.char_to_child_node[next_char]
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        can_split = [False] * (len(s) + 1)
+        can_split[0] = True
+        trie_with_failure = AhoCorasick(wordDict)
+        node = trie_with_failure.trie.root
+        for i, c in enumerate(s):
+            node = trie_with_failure.move_to_next_node(node, c)
+            if not node.is_end_of_word:
+                continue
+            if not can_split[i - node.depth + 1]:
+                continue
+            can_split[i + 1] = True
+        return can_split[-1]
+```
+
+- [アホコラの実装](https://github.com/fhiyo/leetcode/commit/ed3ace0dd68fc3c3b4a7b1a9a426cdc357ba9ba2)をfhiyoさんがしてくれてた。ありがたい。
+    - 行き先がない場合、Noneではなくrootで管理している
+        - これに伴い、未知のcharacterが出た場合新しくknown_charsに入れて、全部行き先管理するのは、ちょっとややこしい？
+    - 自分の実装、ノードはdataclassでも良かったかも？
+- 正規表現での実装(バックトラックが起きたため、s = “aaaaaaaaa”で”a”, “aa”, “aaa”, “aaaa”などの場合Time Limit Exceeded）
+    - https://github.com/shining-ai/leetcode/pull/39を見る感じ、awkやre2だとOKなのかも(re2はLeetCodeでimportできず)
+    - プロダクションコードでは、ユーザーの入力を正規表現でcheckはやらない
+
+```python
+
+import re
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        escaped_words = [re.escape(word) for word in wordDict]
+        pattern = '^(' + '|'.join(escaped_words) + ')+$'
+        result = re.match(pattern, s)
+        if not result:
+            return False
+        return True
+```
+
+## Step3
+
+```python
+import re
+
+class Solution:
+    def wordBreak(self, s: str, wordDict: List[str]) -> bool:
+        # [0, i)までの場合、分割できるか
+        can_split = [False] * (len(s) + 1)
+        can_split[0] = True
+        for i in range(len(s)):
+            if not can_split[i]:
+                continue
+            for word in wordDict:
+                if not s.startswith(word, i):
+                    continue
+                can_split[i + len(word)] = True
+        return can_split[-1]
+```