import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import kurtosis,skew
from scipy import stats


def resumetable(df):print(f"Dataset Shape: {df.shape}")summary = pd.DataFrame(df.dtypes,columns=['dtypes'])summary = summary.reset_index()summary['Name'] = summary['index']summary = summary[['Name','dtypes']]summary['Missing'] = df.isnull().sum().values    summary['Uniques'] = df.nunique().valuessummary['First Value'] = df.loc[0].valuessummary['Second Value'] = df.loc[1].valuessummary['Third Value'] = df.loc[2].valuesfor name in summary['Name'].value_counts().index:summary.loc[summary['Name'] == name, 'Entropy'] = round(stats.entropy(df[name].value_counts(normalize=True), base=2),2) return summarydef CalcOutliers(df_num): '''Leonardo Ferreira 20/10/2018Set a numerical value and it will calculate the upper, lower and total number of outliersIt will print a lot of statistics of the numerical feature that you set on input'''# calculating mean and std of the arraydata_mean, data_std = np.mean(df_num), np.std(df_num)# seting the cut line to both higher and lower values# You can change this valuecut = data_std * 3#Calculating the higher and lower cut valueslower, upper = data_mean - cut, data_mean + cut# creating an array of lower, higher and total outlier values outliers_lower = [x for x in df_num if x < lower]outliers_higher = [x for x in df_num if x > upper]outliers_total = [x for x in df_num if x < lower or x > upper]# array without outlier valuesoutliers_removed = [x for x in df_num if x > lower and x < upper]print('Identified lowest outliers: %d' % len(outliers_lower)) # printing total number of values in lower cut of outliersprint('Identified upper outliers: %d' % len(outliers_higher)) # printing total number of values in higher cut of outliersprint('Identified outliers: %d' % len(outliers_total)) # printing total number of values outliers of both sidesprint('Non-outlier observations: %d' % len(outliers_removed)) # printing total number of non outlier valuesprint("Total percentual of Outliers: ", round((len(outliers_total) / len(outliers_removed) )*100, 4)) # Percentual of outliers in pointsreturn


