diff --git a/bfs.py b/bfs.py new file mode 100644 index 0000000..0fda290 --- /dev/null +++ b/bfs.py @@ -0,0 +1,19 @@ +from collections import deque + +def bfs(tree,root): + visited = [] + connected = [] + # visited.append(root) + connected.append(root) + while connected: + node = connected.pop(0) + if node <= len(tree.keys()): + for nextnode in tree[node]: + connected.append(nextnode) + visited.append(node) + else: + visited.append(node) + print("Visited : ",visited) + print("Connected :",connected) +tree = {1:[2,3,4],2:[5,6],3:[7,8],4:[9]} +output = bfs(tree,1) diff --git a/binary _search.py b/binary _search.py new file mode 100644 index 0000000..b34f6bc --- /dev/null +++ b/binary _search.py @@ -0,0 +1,35 @@ +def binary_search(arr, low, high, x): + + # Check base case + if high >= low: + + mid = (high + low) // 2 + + # If element is present at the middle itself + if arr[mid] == x: + return mid + + # If element is smaller than mid, then it can only + # be present in left subarray + elif arr[mid] > x: + return binary_search(arr, low, mid - 1, x) + + # Else the element can only be present in right subarray + else: + return binary_search(arr, mid + 1, high, x) + + else: + # Element is not present in the array + return -1 + +# Test array +arr = [ 2, 3, 4, 10, 40 ] +x = 10 + +# Function call +result = binary_search(arr, 0, len(arr)-1, x) + +if result != -1: + print("Element is present at index", str(result)) +else: + print("Element is not present in array") diff --git a/cleaning_data_for_ml.py b/cleaning_data_for_ml.py new file mode 100644 index 0000000..2f6994b --- /dev/null +++ b/cleaning_data_for_ml.py @@ -0,0 +1,21 @@ +# load data +filename = 'metamorphosis_clean.txt' +file = open(filename, 'rt') +text = file.read() +file.close() +# split into words +from nltk.tokenize import word_tokenize +tokens = word_tokenize(text) +# convert to lower case +tokens = [w.lower() for w in tokens] +# remove punctuation from each word +import string +table = str.maketrans('', '', string.punctuation) +stripped = [w.translate(table) for w in tokens] +# remove remaining tokens that are not alphabetic +words = [word for word in stripped if word.isalpha()] +# filter out stop words +from nltk.corpus import stopwords +stop_words = set(stopwords.words('english')) +words = [w for w in words if not w in stop_words] +print(words[:100]) \ No newline at end of file