diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index cd8686b..2dbfe37 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc index 7f99883..6298c3c 100644 Binary files a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc index 58a2a31..4325587 100644 Binary files a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc differ diff --git a/q01_calculate_statistics/build.py b/q01_calculate_statistics/build.py index a556241..de94451 100644 --- a/q01_calculate_statistics/build.py +++ b/q01_calculate_statistics/build.py @@ -1,11 +1,22 @@ +# %load q01_calculate_statistics/build.py # Default Imports import numpy as np import pandas as pd +import statistics data = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = data.loc[:, "SalePrice"] - +sale_price = data.loc[:, 'SalePrice'] +sale_price +def calculate_statistics(): + mean1=np.mean(sale_price) + median1=np.median(sale_price) + series1=np.array(sale_price) + mode1=statistics.mode(series1) + return(mean1,median1,np.int64(mode1)) +calculate_statistics() # Return mean,median & mode for the SalePrice Column # Write your code here + + diff --git a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc index b1b01d5..3c0dcd9 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc index b15e8f5..2d029bd 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/__init__.cpython-36.pyc b/q02_plot/__pycache__/__init__.cpython-36.pyc index 215eac0..bd2e570 100644 Binary files a/q02_plot/__pycache__/__init__.cpython-36.pyc and b/q02_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/build.cpython-36.pyc b/q02_plot/__pycache__/build.cpython-36.pyc index bed076d..bfd6851 100644 Binary files a/q02_plot/__pycache__/build.cpython-36.pyc and b/q02_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_plot/build.py b/q02_plot/build.py index 70276d6..a02ed4a 100644 --- a/q02_plot/build.py +++ b/q02_plot/build.py @@ -1,12 +1,27 @@ +# %load q02_plot/build.py # Default Imports import pandas as pd import matplotlib.pyplot as plt from greyatomlib.descriptive_stats.q01_calculate_statistics.build import calculate_statistics +import statistics +import numpy as np plt.switch_backend('agg') dataframe = pd.read_csv('data/house_prices_multivariate.csv') sale_price = dataframe.loc[:, 'SalePrice'] +def plot(): + #plt.hist(sale_price) + plt.hist(sale_price) + plt.axvline(sale_price.mean(),color='c') + plt.axvline(sale_price.median(),color='k') + series1=np.array(sale_price) + mode1=statistics.mode(series1) + plt.axvline(mode1,color='k') + plt.show() +plot() # Draw the plot for the mean, median and mode for the dataset + + diff --git a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc index 488a890..b05645d 100644 Binary files a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc and b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc index 56f4330..5e7c270 100644 Binary files a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc and b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc index 543c178..2b8ddfe 100644 Binary files a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc index ba8cf11..2407e94 100644 Binary files a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q03_pearson_correlation/build.py b/q03_pearson_correlation/build.py index 33a762b..81b9aa7 100644 --- a/q03_pearson_correlation/build.py +++ b/q03_pearson_correlation/build.py @@ -1,9 +1,14 @@ +# %load q03_pearson_correlation/build.py # Default Imports import pandas as pd +from scipy.stats.stats import pearsonr dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') -# Return the correlation value between the SalePrice column for the two loaded datasets -# Your code here +def correlation(): + result=pearsonr(dataframe_1['SalePrice'],dataframe_2['SalePrice']) + return(result[0]) +correlation() + diff --git a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc index d7eca99..61a914a 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc index ed900c4..b99a476 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc index 7868267..d3b9741 100644 Binary files a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc index 94f735a..83a76a5 100644 Binary files a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_spearman_correlation/build.py b/q04_spearman_correlation/build.py index 557be32..7b98bfb 100644 --- a/q04_spearman_correlation/build.py +++ b/q04_spearman_correlation/build.py @@ -1,8 +1,15 @@ +# %load q04_spearman_correlation/build.py # Default Import import pandas as pd dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') +from scipy.stats import spearmanr # Your code here +def spearman_correlation(): + result=spearmanr(dataframe_1['SalePrice'],dataframe_2['SalePrice']) + return(result[0]) +spearman_correlation() + diff --git a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc index 495646a..145e4de 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc index d082652..ddc8c53 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc differ