diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index cd8686b..cb08767 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc index 7f99883..139eaf6 100644 Binary files a/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc index 58a2a31..0645d69 100644 Binary files a/q01_calculate_statistics/__pycache__/build.cpython-36.pyc and b/q01_calculate_statistics/__pycache__/build.cpython-36.pyc differ diff --git a/q01_calculate_statistics/build.py b/q01_calculate_statistics/build.py index a556241..2454e67 100644 --- a/q01_calculate_statistics/build.py +++ b/q01_calculate_statistics/build.py @@ -1,11 +1,17 @@ +# %load q01_calculate_statistics/build.py # Default Imports import numpy as np import pandas as pd data = pd.read_csv('data/house_prices_multivariate.csv') -sale_price = data.loc[:, "SalePrice"] +sale_price = data.loc[:, 'SalePrice'] # Return mean,median & mode for the SalePrice Column # Write your code here +def calculate_statistics(): + #print(np.argmax(np.bincount(sale_price))) + return np.mean(sale_price),np.median(sale_price),np.argmax(np.bincount(sale_price)) +calculate_statistics() + diff --git a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc index b1b01d5..d29ca72 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc index b15e8f5..90bc6a5 100644 Binary files a/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc and b/q01_calculate_statistics/tests/__pycache__/test_q01_plot.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/__init__.cpython-36.pyc b/q02_plot/__pycache__/__init__.cpython-36.pyc index 215eac0..28e99a1 100644 Binary files a/q02_plot/__pycache__/__init__.cpython-36.pyc and b/q02_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/__pycache__/build.cpython-36.pyc b/q02_plot/__pycache__/build.cpython-36.pyc index bed076d..f9efe15 100644 Binary files a/q02_plot/__pycache__/build.cpython-36.pyc and b/q02_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_plot/build.py b/q02_plot/build.py index 70276d6..fd5fe85 100644 --- a/q02_plot/build.py +++ b/q02_plot/build.py @@ -1,3 +1,4 @@ +# %load q02_plot/build.py # Default Imports import pandas as pd import matplotlib.pyplot as plt @@ -9,4 +10,16 @@ # Draw the plot for the mean, median and mode for the dataset +def plot(): + plt.figure(figsize=(10, 6)) + plt.hist(sale_price, bins=40) + plt.plot([mode]*300, range(300), label='mode') + plt.plot([median]*300, range(300), label='median') + plt.plot([mean]*300, range(300), label='mean') + plt.ylim(0, 250) + plt.legend() + plt.show() +mean, median, mode = calculate_statistics() +plot() + diff --git a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc index 488a890..6f1b7f6 100644 Binary files a/q02_plot/tests/__pycache__/__init__.cpython-36.pyc and b/q02_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc index 56f4330..9bb1d79 100644 Binary files a/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc and b/q02_plot/tests/__pycache__/test_q02_plot.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc index 543c178..652ed7f 100644 Binary files a/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc index ba8cf11..2891916 100644 Binary files a/q03_pearson_correlation/__pycache__/build.cpython-36.pyc and b/q03_pearson_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q03_pearson_correlation/build.py b/q03_pearson_correlation/build.py index 33a762b..7f6ec37 100644 --- a/q03_pearson_correlation/build.py +++ b/q03_pearson_correlation/build.py @@ -1,3 +1,4 @@ +# %load q03_pearson_correlation/build.py # Default Imports import pandas as pd @@ -7,3 +8,9 @@ # Return the correlation value between the SalePrice column for the two loaded datasets # Your code here +def correlation(): + return dataframe_1['SalePrice'].corr(dataframe_2['SalePrice']) + +correlation() + + diff --git a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc index d7eca99..859b6b9 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc index ed900c4..54f9f72 100644 Binary files a/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc and b/q03_pearson_correlation/tests/__pycache__/test_q03_correlation.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc index 7868267..10d203d 100644 Binary files a/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc index 94f735a..8d82eaa 100644 Binary files a/q04_spearman_correlation/__pycache__/build.cpython-36.pyc and b/q04_spearman_correlation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_spearman_correlation/build.py b/q04_spearman_correlation/build.py index 557be32..e1aaa4c 100644 --- a/q04_spearman_correlation/build.py +++ b/q04_spearman_correlation/build.py @@ -1,3 +1,4 @@ +# %load q04_spearman_correlation/build.py # Default Import import pandas as pd @@ -5,4 +6,13 @@ dataframe_2 = pd.read_csv('data/house_prices_copy.csv') # Your code here +def spearman_correlation(): + df1 = pd.DataFrame(dataframe_1['SalePrice']) + df2 = pd.DataFrame(dataframe_2['SalePrice']) + df3 = pd.concat([df1,df2],axis=1) + #print(df3) + return df3.corr(method='spearman',min_periods=1).iloc[0,1] + +spearman_correlation() + diff --git a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc index 495646a..5c6911e 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc index d082652..61e2193 100644 Binary files a/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc and b/q04_spearman_correlation/tests/__pycache__/test_q04_spearman_correlation.cpython-36.pyc differ