diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..5adc54d 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..81514d5 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..cf8f09a 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..fdd9199 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,12 +1,21 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division import pandas as pd +import numpy as np df = pd.read_csv('data/house_pricing.csv') - +data = pd.DataFrame(df) # Enter Code Here +def cond_prob(df): + result = data.shape[0] + ho = data[data['Neighborhood'] == 'OldTown'].shape[0] + cp = float((ho/result)*((ho - 1)/(result - 1))*((ho - 2)/(result - 2))) + return cp +cond_prob(df) + diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..03d26e3 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..4106757 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..e0ce7bc 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..1e949ee 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..1a7549f 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,3 +1,4 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats @@ -7,7 +8,24 @@ sample = df['GrLivArea'] -# Write your solution here : +def confidence_interval(n): + n = sample.size + sample_mean = sample.mean() + + z_critical = stats.norm.ppf(q = 0.95) + + print('z-critical value:') + print(z_critical) + + Std_Error = (sample.std())/math.sqrt(n) + + est = (z_critical) * (Std_Error) + + conf_interval = (sample_mean - est, + sample_mean + est) + return conf_interval +confidence_interval(sample) +sample.size diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..cb2847f 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..e43509a 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..4f9aea1 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..722bb08 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..ebe16ff 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,21 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats -import pandas as pd +import pandas as pd,numpy as np df = pd.read_csv('data/house_pricing.csv') +df = pd.DataFrame(df) - +from statsmodels.stats.weightstats import ztest # Enter Code Here +def t_statistic(df): + statistic, p_value = stats.ttest_1samp(a= df[df['Neighborhood'] == 'OldTown']['GrLivArea'],popmean= df['GrLivArea'].mean()) + pval=np.array(p_value) + test_result=np.all(pval<0.01) + return p_value,test_result + + +t_statistic(df) + + diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..fd72d3c 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..88b635d 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..ff373af 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..956a7f0 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..c74bc82 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,10 +1,26 @@ +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd - +import numpy as np df = pd.read_csv('data/house_pricing.csv') -# Enter Code Here + + # Enter Code Here # Enter Code Here +def chi_square(df): + price = pd.qcut(df['SalePrice'], 3, labels = ['High', 'Medium', 'Low']) + freqtab = pd.crosstab(df.LandSlope,price) + chi2,pval,dof,expected = stats.chi2_contingency(freqtab) + + if pval < 0.005: + chi2 = np.True_ + else: + chi2 = np.False_ + return pval, chi2 + + +chi_square(df) + diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..f76bbee 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..8c23bda 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ