diff --git a/count.py b/count.py index 054146b..1912724 100755 --- a/count.py +++ b/count.py @@ -3,10 +3,10 @@ import read import collections - hn_stories=read.load_data() - headline_str="" - for i in hn_stories["headline"]: - headline_str=headline_str+str(i) - headline_str= str.lower(headline_str) + hn_stories=read.load_data() # data is read from inbuilt dataset + headline_str="" # empty string is set + for i in hn_stories["headline"]: + headline_str=headline_str+str(i) #string is appended + headline_str= str.lower(headline_str) # change to lowe case data=headline_str.split(" ") - print(collections.Counter(data).most_common(100)) \ No newline at end of file + print(collections.Counter(data).most_common(100)) # return the result diff --git a/domains.py b/domains.py index a58a996..1f406cf 100755 --- a/domains.py +++ b/domains.py @@ -3,7 +3,7 @@ hn_stories=read.load_data() domains=hn_stories["url"] -#print(collections.Counter(domains).most_common(100)) + domains2=domains.tolist() nosubdomains=[] @@ -16,6 +16,6 @@ else: nosubdomains.append(i) #print(collections.Counter(nosubdomains).most_common(100)) -#for name, row in domains.items(): +for name, row in domains.items(): print("{0}: {1}".format(name, row))