diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..b4e5589 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..9d0cd1c 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..81b773b 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..d115543 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,12 +1,24 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division import pandas as pd - df = pd.read_csv('data/house_pricing.csv') +def cond_prob(df): + all_houses = df.shape[0] + houses_in_OldTown = df[df['Neighborhood'] == 'OldTown'].shape[0] + x=houses_in_OldTown/all_houses + a=houses_in_OldTown/all_houses + b=(houses_in_OldTown - 1)/(all_houses - 1) + c=(houses_in_OldTown - 2)/(all_houses - 2) + cond_p= a*b*c + return cond_p + + + + -# Enter Code Here diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..b73a337 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..309e274 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..e97fa20 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..e9a34fd 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..a15771a 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,3 +1,4 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats @@ -5,9 +6,22 @@ import numpy as np df = pd.read_csv('data/house_pricing.csv') sample = df['GrLivArea'] - +#print(sample) # Write your solution here : +def confidence_interval(df): + mean = np.mean(sample) + print('mean=', mean) + stand_error = stats.sem(sample) + print('std_err=',stand_error) + z_value = stats.norm.ppf(0.95) + print('z value=', z_value) + low_val = mean - (z_value * stand_error) + up_val = mean + (z_value*stand_error) + return low_val,up_val + +confidence_interval(df) + diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..86ac045 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..8fef2d1 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..5f0a16c 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..6423504 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..e8655cc 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,19 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import numpy as np df = pd.read_csv('data/house_pricing.csv') +def t_statistic(df): + houses_OLDT = df[df['Neighborhood']=='OldTown'].GrLivArea + p_value = (stats.ttest_1samp(a =houses_OLDT , popmean=df.GrLivArea.mean())).pvalue + x= (stats.ttest_1samp(a=df.GrLivArea,popmean=df.GrLivArea.mean())).statistic + t = stats.t.cdf(x,df=49) + return p_value, np.bool_(t > p_value) + + + -# Enter Code Here diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..c4e79a4 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..24dc67b 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ