diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..23b89f1 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..cee39df 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..6898a30 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..ad04108 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,3 +1,4 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division @@ -8,5 +9,19 @@ # Enter Code Here +def cond_prob(df): + + Oldtown_count = df[df['Neighborhood']=='OldTown']['Neighborhood'].count() + Total_count = df['Neighborhood'].count() + + Pick1_Prob = (Oldtown_count/Total_count) + Pick2_Prob = (Oldtown_count-1)/(Total_count-1) + Pick3_Prob = (Oldtown_count-2)/(Total_count-2) + + Conditional_Prob = Pick1_Prob*Pick2_Prob*Pick3_Prob + + return Conditional_Prob + +cond_prob() diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..e1553e5 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..14ae3e6 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..670cbf1 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..1910cff 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..d7f529e 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,13 +1,39 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats import pandas as pd import numpy as np df = pd.read_csv('data/house_pricing.csv') -sample = df['GrLivArea'] +sample = df['GrLivArea'] #Sampling Analysis on GrLivArea + # Write your solution here : +def confidence_interval(sample): + + N = sample.shape[0] #Length of sample + x_bar = sample.mean() #Sample mean + Z = stats.norm.ppf(q = 0.95) #Z-critical + S = sample.std() #S - Sample standard deviation + Margin_Of_Error = Z*(S/math.sqrt(N)) #estimate to be subtracted/added from mean + + Lower_limit = x_bar - Margin_Of_Error #Lower limit of confidence interval + Upper_limit = x_bar + Margin_Of_Error #Upper limit of confidence interval + + print('N : ',N) + print('x_bar : ',x_bar) + print('Z : ',Z) + print('S : ',S) + print('Margin of error : ',Margin_Of_Error) + print('Lower Limit : ',Lower_limit) + print('Upper Limit : ',Upper_limit) + + return Lower_limit,Upper_limit + +#Call to the function +confidence_interval(sample) + diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..7da332e 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..14b1de8 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..9648422 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..729b915 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..971b691 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,18 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import math +from statsmodels.stats.weightstats import ztest df = pd.read_csv('data/house_pricing.csv') +def t_statistic(df): + #z_statistic, p_value = ztest(x1=df[df['Neighborhood']=='OldTown']['GrLivArea'], value=df['GrLivArea'].mean()) + z_statistic, p_value = stats.ttest_1samp(df[df['Neighborhood']=='OldTown']['GrLivArea'],df['GrLivArea'].mean()) + print(z_statistic) + return p_value,p_value<0.1 #As significance level is 90% hence 0.1 + +t_statistic(df) -# Enter Code Here diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..0ec7db8 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..7c8b590 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..8ff21ec 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..748490c 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..60a2a80 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,3 +1,5 @@ +# Need to check this. This is wrong I guess. +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd @@ -6,5 +8,30 @@ # Enter Code Here +def chi_square(df): + + # Find the critical value for 95% confidence* + # Df = number of variable categories - 1 + crit = stats.chi2.ppf(q = 0.95,df = 2) + #print('Critical value : ',crit) + + x = df['LandSlope'] + y = pd.qcut(df['SalePrice'],3,labels=['High', 'medium', 'low']) + + freqtab = pd.crosstab(x,y) + chi2,pval,dof,expected = stats.chi2_contingency(freqtab) + + #print('Chi-square test statistic : ',chi2) + #print('P-Value : ',pval) + + # If chi-square value exceeds critical value, reject the Null hypothesis viz. return False + return pval,(chi2>crit) + + + + +#Call to the function - +chi_square(df) + diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..182e6b1 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..d00defc 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ