diff --git a/bfs.py b/bfs.py
new file mode 100644
index 0000000..0fda290
--- /dev/null
+++ b/bfs.py
@@ -0,0 +1,19 @@
+from collections import deque
+
+def bfs(tree,root):
+    visited = []
+    connected = []
+     # visited.append(root)
+    connected.append(root)
+    while connected:
+        node = connected.pop(0)
+        if node <= len(tree.keys()):
+            for nextnode in tree[node]:
+                connected.append(nextnode)
+            visited.append(node)
+        else:
+            visited.append(node)
+        print("Visited : ",visited)
+        print("Connected :",connected)
+tree = {1:[2,3,4],2:[5,6],3:[7,8],4:[9]}
+output = bfs(tree,1)
diff --git a/binary _search.py b/binary _search.py
new file mode 100644
index 0000000..b34f6bc
--- /dev/null
+++ b/binary _search.py	
@@ -0,0 +1,35 @@
+def binary_search(arr, low, high, x): 
+  
+    # Check base case 
+    if high >= low: 
+  
+        mid = (high + low) // 2
+  
+        # If element is present at the middle itself 
+        if arr[mid] == x: 
+            return mid 
+  
+        # If element is smaller than mid, then it can only 
+        # be present in left subarray 
+        elif arr[mid] > x: 
+            return binary_search(arr, low, mid - 1, x) 
+  
+        # Else the element can only be present in right subarray 
+        else: 
+            return binary_search(arr, mid + 1, high, x) 
+  
+    else: 
+        # Element is not present in the array 
+        return -1
+  
+# Test array 
+arr = [ 2, 3, 4, 10, 40 ] 
+x = 10
+  
+# Function call 
+result = binary_search(arr, 0, len(arr)-1, x) 
+  
+if result != -1: 
+    print("Element is present at index", str(result)) 
+else: 
+    print("Element is not present in array") 
diff --git a/cleaning_data_for_ml.py b/cleaning_data_for_ml.py
new file mode 100644
index 0000000..2f6994b
--- /dev/null
+++ b/cleaning_data_for_ml.py
@@ -0,0 +1,21 @@
+# load data
+filename = 'metamorphosis_clean.txt'
+file = open(filename, 'rt')
+text = file.read()
+file.close()
+# split into words
+from nltk.tokenize import word_tokenize
+tokens = word_tokenize(text)
+# convert to lower case
+tokens = [w.lower() for w in tokens]
+# remove punctuation from each word
+import string
+table = str.maketrans('', '', string.punctuation)
+stripped = [w.translate(table) for w in tokens]
+# remove remaining tokens that are not alphabetic
+words = [word for word in stripped if word.isalpha()]
+# filter out stop words
+from nltk.corpus import stopwords
+stop_words = set(stopwords.words('english'))
+words = [w for w in words if not w in stop_words]
+print(words[:100])
\ No newline at end of file