diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index cd8686b..c3fefb8 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc index 7f99883..66f0409 100644 Binary files a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc index 58a2a31..7185992 100644 Binary files a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc differ diff --git a/q01_calculate_statistics/build.py b/q01_calculate_statistics/build.py index a556241..81092be 100644 --- a/q01_calculate_statistics/build.py +++ b/q01_calculate_statistics/build.py @@ -1,11 +1,16 @@ +# %load q01_calculate_statistics/build.py # Default Imports import numpy as np import pandas as pd data = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = data.loc[:, "SalePrice"] +def calculate_statistics(): + sale_price = data.loc[:, 'SalePrice'] + mean=np.mean(sale_price) + median=sale_price.median() + mode=sale_price.mode() + return mean,median,mode[0] +mean,median,mode=calculate_statistics() -# Return mean,median & mode for the SalePrice Column -# Write your code here diff --git a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc index b1b01d5..c5ebd69 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc index b15e8f5..e1f388f 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/__init__.cpython-36.pyc b/q02_plot/__pycache__/__init__.cpython-36.pyc index 215eac0..6a6ac77 100644 Binary files a/q02_plot/__pycache__/__init__.cpython-36.pyc and b/q02_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/build.cpython-36.pyc b/q02_plot/__pycache__/build.cpython-36.pyc index bed076d..31af117 100644 Binary files a/q02_plot/__pycache__/build.cpython-36.pyc and b/q02_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_plot/build.py b/q02_plot/build.py index 70276d6..69178f6 100644 --- a/q02_plot/build.py +++ b/q02_plot/build.py @@ -1,6 +1,8 @@ +# %load q02_plot/build.py # Default Imports import pandas as pd import matplotlib.pyplot as plt +import numpy as np from greyatomlib.descriptive_stats.q01_calculate_statistics.build import calculate_statistics plt.switch_backend('agg') @@ -9,4 +11,25 @@ # Draw the plot for the mean, median and mode for the dataset +def plot(): + mean=np.mean(sale_price) + median=sale_price.median() + mode=sale_price.mode() + mode=mode[0] + plt.figure(figsize=(10, 6)) + plt.hist(sale_price, bins=40) + plt.plot([mode]*300, range(300), label='mode') + plt.plot([median]*300, range(300), label='median') + plt.plot([mean]*300, range(300), label='mean') + plt.ylim(0, 250) + plt.legend() + plt.show() + return + + +plot() + + + + diff --git a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc index 488a890..27b0475 100644 Binary files a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc and b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc index 56f4330..970834f 100644 Binary files a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc and b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc index 543c178..98d5f15 100644 Binary files a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc index ba8cf11..4964a62 100644 Binary files a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q03_pearson_correlation/build.py b/q03_pearson_correlation/build.py index 33a762b..ed0ab2b 100644 --- a/q03_pearson_correlation/build.py +++ b/q03_pearson_correlation/build.py @@ -1,5 +1,7 @@ +# %load q03_pearson_correlation/build.py # Default Imports import pandas as pd +import numpy as np dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') @@ -7,3 +9,11 @@ # Return the correlation value between the SalePrice column for the two loaded datasets # Your code here +def correlation(): + + coeff=np.corrcoef(dataframe_1['SalePrice'], dataframe_2['SalePrice'])[0,1] + return float(coeff) + +correlation() + + diff --git a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc index d7eca99..2e7d65b 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc index ed900c4..883a194 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc index 7868267..29355b8 100644 Binary files a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc index 94f735a..7d008fe 100644 Binary files a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_spearman_correlation/build.py b/q04_spearman_correlation/build.py index 557be32..1b6362d 100644 --- a/q04_spearman_correlation/build.py +++ b/q04_spearman_correlation/build.py @@ -1,8 +1,24 @@ +# %load q04_spearman_correlation/build.py # Default Import import pandas as pd +import numpy as np dataframe_1 = pd.read_csv('data/house_prices_multivariate.csv') dataframe_2 = pd.read_csv('data/house_prices_copy.csv') # Your code here +def spearman_correlation(): + dataframe_1['rank']=dataframe_1['SalePrice'].rank(ascending=1) + dataframe_2['rank']=dataframe_2['SalePrice'].rank(ascending=1) + d=abs(dataframe_1['rank']-dataframe_2['rank']) + df=pd.DataFrame(d**2) + sum=df.sum() + n=len(df) + s_c=1 - 6*sum/(n*(n**2-1)) + return float(s_c) + +spearman_correlation() + + + diff --git a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc index 495646a..7da7244 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc index d082652..1488887 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc differ