diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index cd8686b..a88baa4 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc index 7f99883..f5f3006 100644 Binary files a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc index 58a2a31..bd99871 100644 Binary files a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc differ diff --git a/q01_calculate_statistics/build.py b/q01_calculate_statistics/build.py index a556241..0a8adb2 100644 --- a/q01_calculate_statistics/build.py +++ b/q01_calculate_statistics/build.py @@ -1,11 +1,18 @@ +# %load q01_calculate_statistics/build.py # Default Imports import numpy as np import pandas as pd data = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = data.loc[:, "SalePrice"] +sale_price = data.loc[:, 'SalePrice'] # Return mean,median & mode for the SalePrice Column # Write your code here +def calculate_statistics(): + Sales_price = pd.Series(data['SalePrice']) + return np.mean(sale_price),np.median(sale_price),np.int64(Sales_price.mode()[0]) + +calculate_statistics() + diff --git a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc index b1b01d5..3509a06 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc index b15e8f5..b0f4d68 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/__init__.cpython-36.pyc b/q02_plot/__pycache__/__init__.cpython-36.pyc index 215eac0..edf1af6 100644 Binary files a/q02_plot/__pycache__/__init__.cpython-36.pyc and b/q02_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/build.cpython-36.pyc b/q02_plot/__pycache__/build.cpython-36.pyc index bed076d..20a1b80 100644 Binary files a/q02_plot/__pycache__/build.cpython-36.pyc and b/q02_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_plot/build.py b/q02_plot/build.py index 70276d6..27405d4 100644 --- a/q02_plot/build.py +++ b/q02_plot/build.py @@ -1,5 +1,7 @@ +# %load q02_plot/build.py # Default Imports import pandas as pd +import numpy as np import matplotlib.pyplot as plt from greyatomlib.descriptive_stats.q01_calculate_statistics.build import calculate_statistics @@ -7,6 +9,16 @@ dataframe = pd.read_csv('data/house_prices_multivariate.csv') sale_price = dataframe.loc[:, 'SalePrice'] +def plot(): + mean=np.mean(sale_price) + median=np.median(sale_price) + mode=sale_price.mode()[0] + plt.axvline(x=mean) + plt.axvline(x=median) + plt.axvline(x=mode) + plt.show() + + +plot() -# Draw the plot for the mean, median and mode for the dataset diff --git a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc index 488a890..f5516ca 100644 Binary files a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc and b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc index 56f4330..150d628 100644 Binary files a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc and b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc index 543c178..c7300f4 100644 Binary files a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc index ba8cf11..a513bd9 100644 Binary files a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q03_pearson_correlation/build.py b/q03_pearson_correlation/build.py index 33a762b..f9a3ee1 100644 --- a/q03_pearson_correlation/build.py +++ b/q03_pearson_correlation/build.py @@ -1,9 +1,23 @@ +# %load q03_pearson_correlation/build.py # Default Imports import pandas as pd +import numpy as np dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') +Sales_Price_1 = dataframe_1.SalePrice +Sales_Price_2 = dataframe_2.SalePrice + + # Return the correlation value between the SalePrice column for the two loaded datasets # Your code here +def correlation(): + return np.corrcoef(Sales_Price_1, Sales_Price_2)[0,1] + + + +correlation() + + diff --git a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc index d7eca99..a3ee68f 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc index ed900c4..65028e1 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc index 7868267..dc8d6e6 100644 Binary files a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc index 94f735a..557959f 100644 Binary files a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_spearman_correlation/build.py b/q04_spearman_correlation/build.py index 557be32..964c56c 100644 --- a/q04_spearman_correlation/build.py +++ b/q04_spearman_correlation/build.py @@ -1,8 +1,21 @@ +# %load q04_spearman_correlation/build.py # Default Import import pandas as pd +from scipy.stats import spearmanr dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') -# Your code here +SalesPrice_1=dataframe_1.SalePrice +SalesPrice_2=dataframe_2.SalePrice + + +def spearman_correlation(): + coef, p = spearmanr(SalesPrice_1, SalesPrice_2) + return coef + + + +spearman_correlation() + diff --git a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc index 495646a..4653139 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc index d082652..9dd9c44 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc differ