diff --git a/utils.py b/utils.py
index 2648c500..e0827a91 100644
--- a/utils.py
+++ b/utils.py
@@ -6,6 +6,7 @@
 from tqdm import tqdm
 import time
 from datetime import timedelta
+import jieba
 
 
 MAX_VOCAB_SIZE = 10000  # 词表长度限制
@@ -30,7 +31,7 @@ def build_vocab(file_path, tokenizer, max_size, min_freq):
 
 def build_dataset(config, ues_word):
     if ues_word:
-        tokenizer = lambda x: x.split(' ')  # 以空格隔开，word-level
+        tokenizer = lambda x: list(jieba.cut(x))
     else:
         tokenizer = lambda x: [y for y in x]  # char-level
     if os.path.exists(config.vocab_path):
diff --git a/utils_fasttext.py b/utils_fasttext.py
index fe349252..4eea99da 100644
--- a/utils_fasttext.py
+++ b/utils_fasttext.py
@@ -6,6 +6,7 @@
 from tqdm import tqdm
 import time
 from datetime import timedelta
+import jieba
 
 
 MAX_VOCAB_SIZE = 10000
@@ -30,7 +31,7 @@ def build_vocab(file_path, tokenizer, max_size, min_freq):
 
 def build_dataset(config, ues_word):
     if ues_word:
-        tokenizer = lambda x: x.split(' ')  # 以空格隔开，word-level
+        tokenizer = lambda x: list(jieba.cut(x))
     else:
         tokenizer = lambda x: [y for y in x]  # char-level
     if os.path.exists(config.vocab_path):