commit-live-students · vivekshingate · Nov 10, 2018 · Nov 11, 2018 · Nov 13, 2018 · Nov 14, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc
diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py
@@ -1,3 +1,4 @@
+# %load q01_cond_prob/build.py
 # So that float division is by default in python 2.7
 from __future__ import division
 
@@ -8,5 +9,19 @@
 
 # Enter Code Here
 
+def cond_prob(df):
+
+    Oldtown_count = df[df['Neighborhood']=='OldTown']['Neighborhood'].count()
+    Total_count   = df['Neighborhood'].count()
+
+    Pick1_Prob = (Oldtown_count/Total_count)
+    Pick2_Prob = (Oldtown_count-1)/(Total_count-1)
+    Pick3_Prob = (Oldtown_count-2)/(Total_count-2)
+
+    Conditional_Prob = Pick1_Prob*Pick2_Prob*Pick3_Prob
+
+    return Conditional_Prob
+
+cond_prob()
 
 
diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc
diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc
diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py
@@ -1,13 +1,39 @@
+# %load q02_confidence_interval/build.py
 # Default imports
 import math
 import scipy.stats as stats
 import pandas as pd
 import numpy as np
 df = pd.read_csv('data/house_pricing.csv')
-sample = df['GrLivArea']
+sample = df['GrLivArea']              #Sampling Analysis on GrLivArea
+
 
 
 # Write your solution here :
+def confidence_interval(sample):
+
+    N     = sample.shape[0]               #Length of sample
+    x_bar = sample.mean()                 #Sample mean
+    Z     = stats.norm.ppf(q = 0.95)      #Z-critical  
+    S     = sample.std()                  #S - Sample standard deviation
+    Margin_Of_Error = Z*(S/math.sqrt(N))  #estimate to be subtracted/added from mean
+
+    Lower_limit = x_bar - Margin_Of_Error #Lower limit of confidence interval
+    Upper_limit = x_bar + Margin_Of_Error #Upper limit of confidence interval
+
+    print('N               : ',N)
+    print('x_bar           : ',x_bar)
+    print('Z               : ',Z)
+    print('S               : ',S)
+    print('Margin of error : ',Margin_Of_Error)
+    print('Lower Limit     : ',Lower_limit)
+    print('Upper Limit     : ',Upper_limit)
+
+    return Lower_limit,Upper_limit
+
+#Call to the function
+confidence_interval(sample)
+
 
 
 
diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc
diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc
diff --git a/q03_t_test/build.py b/q03_t_test/build.py
@@ -1,9 +1,18 @@
+# %load q03_t_test/build.py
 # Default imports
 import scipy.stats as stats
 import pandas as pd
+import math
+from statsmodels.stats.weightstats import ztest
 
 df = pd.read_csv('data/house_pricing.csv')
 
+def t_statistic(df):
+    #z_statistic, p_value = ztest(x1=df[df['Neighborhood']=='OldTown']['GrLivArea'], value=df['GrLivArea'].mean())
+    z_statistic, p_value = stats.ttest_1samp(df[df['Neighborhood']=='OldTown']['GrLivArea'],df['GrLivArea'].mean())
+    print(z_statistic)
+    return p_value,p_value<0.1  #As significance level is 90% hence 0.1
+
+t_statistic(df)
 
-# Enter Code Here
 
diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc
diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc
diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py
@@ -1,3 +1,5 @@
+# Need to check this.  This is wrong I guess.
+# %load q04_chi2_test/build.py
 # Default imports
 import scipy.stats as stats
 import pandas as pd
@@ -6,5 +8,30 @@
 
 
 # Enter Code Here
+def chi_square(df):
+
+    # Find the critical value for 95% confidence*
+    # Df = number of variable categories - 1    
+    crit = stats.chi2.ppf(q = 0.95,df = 2) 
+    #print('Critical value            : ',crit)
+
+    x = df['LandSlope']
+    y = pd.qcut(df['SalePrice'],3,labels=['High', 'medium', 'low'])
+
+    freqtab = pd.crosstab(x,y)
+    chi2,pval,dof,expected = stats.chi2_contingency(freqtab)
+
+    #print('Chi-square test statistic : ',chi2)
+    #print('P-Value                   : ',pval)
+
+    # If chi-square value exceeds critical value, reject the Null hypothesis viz. return False
+    return pval,(chi2>crit)
+
+
+
+
+#Call to the function -
+chi_square(df)
+
 
 
diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc