diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index cd8686b..c31fe07 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc index 7f99883..db8af0f 100644 Binary files a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc index 58a2a31..9456515 100644 Binary files a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc differ diff --git a/q01_calculate_statistics/build.py b/q01_calculate_statistics/build.py index a556241..bdd6763 100644 --- a/q01_calculate_statistics/build.py +++ b/q01_calculate_statistics/build.py @@ -1,11 +1,24 @@ +# %load q01_calculate_statistics/build.py # Default Imports import numpy as np import pandas as pd data = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = data.loc[:, "SalePrice"] +sale_price = data.loc[:, 'SalePrice'] + +def calculate_statistics() : + mean=np.mean(sale_price) + median=np.median(sale_price) + mode=sale_price.mode() + + return(float(mean),float(median),mode[0]) + +calculate_statistics() # Return mean,median & mode for the SalePrice Column # Write your code here + + + diff --git a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc index b1b01d5..c516063 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc index b15e8f5..88c352e 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/__init__.cpython-36.pyc b/q02_plot/__pycache__/__init__.cpython-36.pyc index 215eac0..9cb26a1 100644 Binary files a/q02_plot/__pycache__/__init__.cpython-36.pyc and b/q02_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/build.cpython-36.pyc b/q02_plot/__pycache__/build.cpython-36.pyc index bed076d..01e13ee 100644 Binary files a/q02_plot/__pycache__/build.cpython-36.pyc and b/q02_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_plot/build.py b/q02_plot/build.py index 70276d6..d24b855 100644 --- a/q02_plot/build.py +++ b/q02_plot/build.py @@ -1,12 +1,30 @@ -# Default Imports +import numpy as np import pandas as pd import matplotlib.pyplot as plt from greyatomlib.descriptive_stats.q01_calculate_statistics.build import calculate_statistics +#To be uncommented while test case check plt.switch_backend('agg') -dataframe = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = dataframe.loc[:, 'SalePrice'] + # Draw the plot for the mean, median and mode for the dataset +def plot(): + + dataframe = pd.read_csv('data/house_prices_multivariate.csv') + sale_price = dataframe.loc[:, 'SalePrice'] + + plt.hist(sale_price, bins=60) + + plt.axvline(x=sale_price.mean(),color='red',linestyle='--',label='Mean') + plt.axvline(x=sale_price.median(),color='black',linestyle='--',label='Median') + plt.axvline(x=sale_price.mode()[0],color='orange',linestyle='--',label='Mode') + + plt.legend(loc=0) + + plt.show() + +plot() + + diff --git a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc index 488a890..fddab4a 100644 Binary files a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc and b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc index 56f4330..e5efe7b 100644 Binary files a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc and b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc index 543c178..360ae2d 100644 Binary files a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc index ba8cf11..c36fd8e 100644 Binary files a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q03_pearson_correlation/build.py b/q03_pearson_correlation/build.py index 33a762b..590a975 100644 --- a/q03_pearson_correlation/build.py +++ b/q03_pearson_correlation/build.py @@ -1,9 +1,24 @@ +# %load q03_pearson_correlation/build.py # Default Imports import pandas as pd -dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') -dataframe_2 = pd.read_csv('data/house_prices_copy.csv') + +def correlation(): + dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') + dataframe_2 = pd.read_csv('data/house_prices_copy.csv') + + correlation=dataframe_1.loc[:, 'SalePrice'].corr(dataframe_2.loc[:, 'SalePrice']) + + +#(correlation) + return correlation + + +correlation() # Return the correlation value between the SalePrice column for the two loaded datasets # Your code here + + + diff --git a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc index d7eca99..bef2ebf 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc index ed900c4..ebc5000 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc index 7868267..923218a 100644 Binary files a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc index 94f735a..c85e9ea 100644 Binary files a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_spearman_correlation/build.py b/q04_spearman_correlation/build.py index 557be32..40f5c8b 100644 --- a/q04_spearman_correlation/build.py +++ b/q04_spearman_correlation/build.py @@ -1,8 +1,22 @@ +# %load q04_spearman_correlation/build.py # Default Import import pandas as pd +from scipy.stats import spearmanr -dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') -dataframe_2 = pd.read_csv('data/house_prices_copy.csv') + +def spearman_correlation(): + + dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') + dataframe_2 = pd.read_csv('data/house_prices_copy.csv') # Your code here + spearmancor=spearmanr(dataframe_1.SalePrice,dataframe_2.SalePrice)[0] + + return spearmancor + +spearman_correlation() + + + + diff --git a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc index 495646a..a2147a3 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc index d082652..600184f 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc differ