import pandas as pd
from pandas import DataFrame


smash_df = pd.read_csv("smash_3.csv")
smash_df.head()


smash_df.shape

(72, 13)


len(smash_df["game"])

72


smash_df["game"] = smash_df["game"].astype("category")
smash_df["game"].info

<bound method Series.info of 0         Super Mario
1         Donkey Kong
2               Zelda
3             Metroid
4             Metroid
           ...       
67    Animal Crossing
68            Pokemon
69        Super Mario
70       Dragon Quest
71      Banjo Kazooie
Name: game, Length: 72, dtype: category
Categories (30, object): ['Animal Crossing', 'Banjo Kazooie', 'Bayonetta', 'Castlevania', ..., 'Super Mario', 'Wii', 'Xenoblade', 'Zelda']>


smash_df["game"].value_counts()

Super Mario        10
Pokemon             7
Zelda               6
Fire Emblem         6
Wii                 4
Kirby               4
Metroid             4
Donkey Kong         3
Animal Crossing     2
Icarus              2
Castlevania         2
Street Fighter      2
Star Fox            2
F-Zero              2
Punch Out           1
Xenoblade           1
Bayonetta           1
Stack Up            1
Splatoon            1
Sonic               1
Earthbound          1
Nintendo            1
Mother              1
Banjo Kazooie       1
Metal Gear          1
Mega Man            1
Dragon Quest        1
Duck Hunt           1
Final Fantasy       1
Pikmin              1
Name: game, dtype: int64


smash_df["game"].value_counts(sort = True)

Super Mario        10
Pokemon             7
Zelda               6
Fire Emblem         6
Wii                 4
Kirby               4
Metroid             4
Donkey Kong         3
Animal Crossing     2
Icarus              2
Castlevania         2
Street Fighter      2
Star Fox            2
F-Zero              2
Punch Out           1
Xenoblade           1
Bayonetta           1
Stack Up            1
Splatoon            1
Sonic               1
Earthbound          1
Nintendo            1
Mother              1
Banjo Kazooie       1
Metal Gear          1
Mega Man            1
Dragon Quest        1
Duck Hunt           1
Final Fantasy       1
Pikmin              1
Name: game, dtype: int64


smash_df["game"].value_counts(sort = True, normalize = True)

Super Mario        0.138889
Pokemon            0.097222
Zelda              0.083333
Fire Emblem        0.083333
Wii                0.055556
Kirby              0.055556
Metroid            0.055556
Donkey Kong        0.041667
Animal Crossing    0.027778
Icarus             0.027778
Castlevania        0.027778
Street Fighter     0.027778
Star Fox           0.027778
F-Zero             0.027778
Punch Out          0.013889
Xenoblade          0.013889
Bayonetta          0.013889
Stack Up           0.013889
Splatoon           0.013889
Sonic              0.013889
Earthbound         0.013889
Nintendo           0.013889
Mother             0.013889
Banjo Kazooie      0.013889
Metal Gear         0.013889
Mega Man           0.013889
Dragon Quest       0.013889
Duck Hunt          0.013889
Final Fantasy      0.013889
Pikmin             0.013889
Name: game, dtype: float64


smash_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   character          72 non-null     object  
 1   game               72 non-null     category
 2   regular_fall       72 non-null     float64 
 3   fast_fall          72 non-null     float64 
 4   weight             72 non-null     int64   
 5   walk_speed         72 non-null     float64 
 6   run_speed          72 non-null     float64 
 7   full_hop_height    70 non-null     float64 
 8   short_hop_height   70 non-null     float64 
 9   double_hop_height  70 non-null     float64 
 10  before             72 non-null     int64   
 11  after              72 non-null     int64   
 12  expertise          72 non-null     object  
dtypes: category(1), float64(7), int64(3), object(2)
memory usage: 8.2+ KB


smash_df.describe()


smash_df["expertise"] = smash_df["expertise"].astype("category")


smash_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   character          72 non-null     object  
 1   game               72 non-null     category
 2   regular_fall       72 non-null     float64 
 3   fast_fall          72 non-null     float64 
 4   weight             72 non-null     int64   
 5   walk_speed         72 non-null     float64 
 6   run_speed          72 non-null     float64 
 7   full_hop_height    70 non-null     float64 
 8   short_hop_height   70 non-null     float64 
 9   double_hop_height  70 non-null     float64 
 10  before             72 non-null     int64   
 11  after              72 non-null     int64   
 12  expertise          72 non-null     category
dtypes: category(2), float64(7), int64(3), object(1)
memory usage: 7.9+ KB


smash_df.describe(include = "category")


smash_df = smash_df.rename(columns = {"full_hop_height": "FULL_HOP_HEIGHT_RENAME"})
smash_df.head()


zelda_df = smash_df[smash_df["game"] == "Zelda"]
zelda_df


zelda_df.shape

(6, 13)


zelda_df.describe()


zelda_df_2 = smash_df.query("game == 'Zelda'")
zelda_df_2


zelda_df_2.shape

(6, 13)


zelda_df_2.describe()


round(zelda_df["run_speed"].mean(),5)

1.72983


values = ["Donkey Kong", "Animal Crossing"]
dk_ac_df = smash_df[smash_df["game"].isin(values)]
dk_ac_df


dk_ac_df_2 = smash_df.query("game == 'Donkey Kong' | game == 'Animal Crossing'")
dk_ac_df_2


expert_metroid_df = smash_df[(smash_df["game"] == "Metroid") & (smash_df["expertise"] == "Expert")]
expert_metroid_df


expert_metroid_df_2 = smash_df.query("game == 'Metroid' & expertise == 'Expert'")
expert_metroid_df_2


fast_supermario_df = smash_df[(smash_df["game"] == "Super Mario") & (smash_df["run_speed"] > 2)]
fast_supermario_df


fast_supermario_df_2 = smash_df.query("game == 'Super Mario' & run_speed > 2")
fast_supermario_df_2


fast_df = smash_df[(smash_df["fast_fall"] > 3) & (smash_df["run_speed"] > 2)]
fast_df


fast_df_2 = smash_df.query("fast_fall > 3 & run_speed > 2")
fast_df_2


smash_df.columns.values.tolist()

['character',
 'game',
 'regular_fall',
 'fast_fall',
 'weight',
 'walk_speed',
 'run_speed',
 'FULL_HOP_HEIGHT_RENAME',
 'short_hop_height',
 'double_hop_height',
 'before',
 'after',
 'expertise']


index_dictionary = {}
for col in smash_df.columns:
    index_dictionary[col] = smash_df.columns.get_loc(col)
print(index_dictionary)

{'character': 0, 'game': 1, 'regular_fall': 2, 'fast_fall': 3, 'weight': 4, 'walk_speed': 5, 'run_speed': 6, 'FULL_HOP_HEIGHT_RENAME': 7, 'short_hop_height': 8, 'double_hop_height': 9, 'before': 10, 'after': 11, 'expertise': 12}


index_dictionary_df = pd.DataFrame(index_dictionary, index=[0])
index_dictionary_df


index_dictionary_df_transposed = index_dictionary_df.transpose()
index_dictionary_df_transposed


print(index_dictionary_df_transposed.to_string())

                         0
character                0
game                     1
regular_fall             2
fast_fall                3
weight                   4
walk_speed               5
run_speed                6
FULL_HOP_HEIGHT_RENAME   7
short_hop_height         8
double_hop_height        9
before                  10
after                   11
expertise               12


smash_df.iloc[:,3:8]


smash_df.iloc[:,[0, 3, 5, 7, 9]]


smash_df_1 = smash_df.iloc[:, [0, 5, 6, 12]]
smash_df_1


smash_df_2 = smash_df.loc[:, "character":"weight"]
smash_df_2


smash_df_3 = smash_df.loc[:, ["character", "game", "regular_fall", "fast_fall", "weight", "expertise", 
                              "run_speed", "FULL_HOP_HEIGHT_RENAME"]]
smash_df_3


import numpy as np


smash_df_4 = smash_df.iloc[:, np.r_[0, 5, 6:13]]
smash_df_4.head()


smash_df_5 = smash_df.iloc[:, np.r_[0:5, 12, 6:8]]
smash_df_5.head()


smash_df.loc[0:4]


smash_df.iloc[0:5]


smash_df.iloc[0:4]


smash_df.iloc[0:4, 5:8]


smash_df.iloc[0:4]["double_hop_height"]

0    36.33
1    35.50
2    29.00
3    37.00
Name: double_hop_height, dtype: float64


smash_df.iloc[3,0:4]

character         samus
game            Metroid
regular_fall       1.33
fast_fall         2.168
Name: 3, dtype: object


pd.concat((smash_df.iloc[:, 0:2], smash_df.iloc[:, 7:9]), axis = 1)


smash_left = pd.concat((smash_df.iloc[0:2], smash_df.iloc[7:9]), axis = 0)
smash_left


smash_right = pd.concat((smash_df.iloc[7:10], smash_df.iloc[50:52]), axis = 0)
smash_right


smash_merge_1 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "left", on = "game")
smash_merge_1


smash_merge_2 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "right", on = "game")
smash_merge_2


smash_merge_3 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "inner", on = "game")
smash_merge_3


smash_merge_4 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "outer", on = "game")
smash_merge_4


import numpy as np


smash_df.sample(5)


smash_df.sample(frac = 0.1)


sample_weights = [10 if walk_speed >= 1 else 1 for walk_speed in smash_df.walk_speed]
smash_df.sample(10, weights = sample_weights)


smash_df["fast_fall"].isnull().values.any()

False


smash_df["fast_fall"].isnull()

0     False
1     False
2     False
3     False
4     False
      ...  
67    False
68    False
69    False
70    False
71    False
Name: fast_fall, Length: 72, dtype: bool


smash_df["short_hop_height"].isnull().values.any()

True


smash_df["short_hop_height"].isnull()

0     False
1     False
2     False
3     False
4     False
      ...  
67    False
68    False
69    False
70     True
71     True
Name: short_hop_height, Length: 72, dtype: bool


smash_df.isnull().values.any()

True


smash_df.isnull()


smash_df.loc[smash_df.isnull().any(axis = 1)]


smash_df_complete = smash_df.dropna()


smash_df_complete.shape

(70, 13)


smash_recoding_df = smash_df.copy()
old_values = ["Amateur", "Expert"]
new_values = ["Blah", "Yay"]
smash_recoding_df["expertise"] = smash_recoding_df["expertise"].replace(old_values, new_values)
smash_recoding_df.head()


def weight_groups_fn(weight):
    if weight < 88:
        return "light weight"
    elif 88 <= weight < 104.5:
        return "medium weight"
    elif 104.5 <= weight:
        return "heavy weight"

smash_recoding_df["weight_group"] = smash_recoding_df["weight"].apply(weight_groups_fn)
smash_recoding_df["weight_group"].value_counts(sort = True)

medium weight    37
heavy weight     18
light weight     17
Name: weight_group, dtype: int64


smash_recoding_df.head()


smash_w_dummies_df = pd.get_dummies(smash_recoding_df, prefix_sep = '_dummy_', drop_first = True)
smash_w_dummies_df.head(5)


import numpy as np
smash_w_missing_df = smash_recoding_df.copy()
missing_rows = smash_w_missing_df.sample(5).index # sample 5 rows to create missing values for illustration
smash_w_missing_df.loc[missing_rows, "regular_fall"] = np.nan # set those 5 samples rows  to have missing values in "regular_fall"
print("Number of rows with valid regular_fall values after setting to NAN: ",
      smash_w_missing_df["regular_fall"].count()) # or just simply smash_w_missing_df["regular_fall"].count()

Number of rows with valid regular_fall values after setting to NAN:  67


median_regular_fall = smash_w_missing_df["regular_fall"].median() # compute the median
smash_w_missing_df["regular_fall"] = smash_w_missing_df["regular_fall"].fillna(value = median_regular_fall) # fill in the value
print('Number of rows with valid regular_fall values after filling NA values: ',
      smash_w_missing_df["regular_fall"].count()) # or just simply smash_w_missing_df["regular_fall"].count()

Number of rows with valid regular_fall values after filling NA values:  72


from sklearn import preprocessing


smash_df.shape

(72, 13)


smash_norm_df = smash_df.select_dtypes(include = np.number)
smash_norm_df


from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
smash_norm_df = pd.DataFrame(scaler.fit_transform(smash_norm_df),
                             index = smash_norm_df.index,
                             columns = smash_norm_df.columns)
smash_norm_df


smash_cat_df = smash_df.select_dtypes(exclude = np.number)
smash_cat_df


smash_norm_combined_df = pd.concat([smash_norm_df, smash_df.select_dtypes(exclude = np.number)], axis = 1)
smash_norm_combined_df


smash_df_numeric = smash_df.iloc[:, 2:11]
smash_df_numeric.head()


import numpy as np
smash_df_numeric_log = np.log(smash_df_numeric)
smash_df_numeric_log.head()


smash_df_numeric_exp = np.exp(smash_df_numeric_log)
smash_df_numeric_exp.head()


smash_df_numeric_log10 = np.log10(smash_df_numeric)
smash_df_numeric_log10.head()


def exp10(x):
    return 10**x
 
# execute the function
smash_df_numeric_log10_exp10 = smash_df_numeric_log10.apply(exp10)
smash_df_numeric_log10_exp10.head()


smash_df_numeric_2 = smash_df.iloc[:, 2:11]
smash_df_numeric_2.head()


def multiply_by_5(x):
    return 5 * x
 
# execute the function
smash_df_numeric_2_x5 = smash_df_numeric_2.apply(multiply_by_5)
 
# displaying the DataFrame

smash_df_numeric_2_x5.head()


def divide_by_5(x):
    return x / 5
 
# execute the function
smash_df_numeric_2_divide_5 = round(smash_df_numeric_2_x5.apply(divide_by_5),2)
 
# displaying the DataFrame

smash_df_numeric_2_divide_5.head()


pivot_1 = smash_df.value_counts("expertise")
pivot_1

expertise
Expert     45
Amateur    27
dtype: int64


pivot_2 = pd.pivot_table(smash_df, values = "regular_fall", index = "game", aggfunc = "mean")
pivot_2


pivot_3 = pd.pivot_table(smash_df, values = "regular_fall", columns = "expertise", index = "game", aggfunc = "mean")
pivot_3


pivot_4 = pd.pivot_table(smash_df, values = "regular_fall", index = "expertise", aggfunc = "median")
pivot_4


pivot_5 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"], index = "expertise", aggfunc = "median")
pivot_5


pivot_6 = pd.pivot_table(smash_df, values = "regular_fall", index = ["expertise", "game"], aggfunc = "median")
pivot_6


pivot_7 = pd.pivot_table(smash_df, values = "regular_fall", columns = ["expertise", "game"], aggfunc = "median")
pivot_7


pivot_8 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"], index = ["expertise", "game"], aggfunc = "median")
pivot_8

# to export
# pivot_8.to_csv("pivot_8.csv")


pivot_9 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"], 
                         index = ["expertise", "game"], aggfunc = ["median", "mean", "max"])
pivot_9


pivot_4.plot.bar(figsize = (10,5), title = "Median Regular Fall by Expertise")

<AxesSubplot:title={'center':'Median Regular Fall by Expertise'}, xlabel='expertise'>


pivot_5.plot.barh(figsize = (10,5), title = "Median Regular Fall by Expertise")

<AxesSubplot:title={'center':'Median Regular Fall by Expertise'}, ylabel='expertise'>


from sklearn.model_selection import train_test_split


smash_split_df = smash_df.copy()


X = smash_split_df.drop(columns = ["expertise"])
y = smash_split_df["expertise"].astype("category")


train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size = 0.3, random_state = 666)


train_X.head()


valid_X.head()


train_y.head()

64    Amateur
22     Expert
38     Expert
25    Amateur
71     Expert
Name: expertise, dtype: category
Categories (2, object): ['Amateur', 'Expert']


valid_y.head()

37     Expert
68     Expert
1      Expert
52     Expert
17    Amateur
Name: expertise, dtype: category
Categories (2, object): ['Amateur', 'Expert']


len(train_X)

50


len(train_y)

50


len(valid_X)

22


len(valid_y)

22


smash_df.head()


pd.DataFrame(smash_df.columns.values, columns = ["Variables"])


smash_df.dtypes

character                   object
game                      category
regular_fall               float64
fast_fall                  float64
weight                       int64
walk_speed                 float64
run_speed                  float64
FULL_HOP_HEIGHT_RENAME     float64
short_hop_height           float64
double_hop_height          float64
before                       int64
after                        int64
expertise                 category
dtype: object


smash_df_7 = smash_df.iloc[:,[1, 2, 3, 4, 12]]
smash_df_7.head()


smash_df_7_predictors = pd.get_dummies(smash_df_7.drop(columns = ["expertise"]), drop_first = False)
smash_df_7_predictors.head()


smash_df_7_full = pd.concat([smash_df_7_predictors, smash_df_7["expertise"]], axis = 1)
smash_df_7_full.head()


smash_df_7_full.columns.values

array(['regular_fall', 'fast_fall', 'weight', 'game_Animal Crossing',
       'game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania',
       'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt',
       'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy',
       'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man',
       'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo',
       'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic',
       'game_Splatoon', 'game_Stack Up', 'game_Star Fox',
       'game_Street Fighter', 'game_Super Mario', 'game_Wii',
       'game_Xenoblade', 'game_Zelda', 'expertise'], dtype=object)


predictors = ['regular_fall', 'fast_fall', 'weight', 'game_Animal Crossing',
              'game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania',
              'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt',
              'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy',
              'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man',
              'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo',
              'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic',
              'game_Splatoon', 'game_Stack Up', 'game_Star Fox',
              'game_Street Fighter', 'game_Super Mario', 'game_Wii',
              'game_Xenoblade', 'game_Zelda']
predictors_numerical = ['regular_fall', 'fast_fall', 'weight']
predictors_categorical = ['game_Animal Crossing','game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania',
                          'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt',
                          'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy',
                          'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man',
                          'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo',
                          'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic',
                          'game_Splatoon', 'game_Stack Up', 'game_Star Fox',
                          'game_Street Fighter', 'game_Super Mario', 'game_Wii',
                          'game_Xenoblade', 'game_Zelda',]
outcome = "expertise"


train_data, valid_data = train_test_split(smash_df_7_full, test_size = 0.3, random_state = 666)


train_data.head()


valid_data.head()


from sklearn import preprocessing

scaler = preprocessing.StandardScaler()
scaler.fit(train_data[predictors_numerical])

StandardScaler()


train_df_norm = pd.DataFrame(scaler.transform(train_data[predictors_numerical]), columns = predictors_numerical)
train_df_norm.head()


# Ensure the indices are similar

train_df_norm.index = train_data.index


train_X = pd.concat([train_df_norm, train_data[predictors_categorical]], axis = 1)
train_X.head()


valid_df_norm = pd.DataFrame(scaler.transform(valid_data[predictors_numerical]), columns = predictors_numerical)
valid_df_norm.head()


# Ensure the indices are similar

valid_df_norm.index = valid_data.index


valid_X = pd.concat([valid_df_norm, valid_data[predictors_categorical]], axis = 1)
valid_X.head()


train_y = train_data[outcome].astype("category")
train_y.head()

64    Amateur
22     Expert
38     Expert
25    Amateur
71     Expert
Name: expertise, dtype: category
Categories (2, object): ['Amateur', 'Expert']


valid_y = valid_data[outcome].astype("category")
valid_y.head()

37     Expert
68     Expert
1      Expert
52     Expert
17    Amateur
Name: expertise, dtype: category
Categories (2, object): ['Amateur', 'Expert']


data = {"regular_fall": [1.8, 2.3],
        "fast_fall": [1.7, 2.0],
        "weight" : [88, 75],
        "game": ["Super Mario", "Zelda"]}
 
# Create DataFrame
new_df = pd.DataFrame(data)
new_df


new_df_dummies = pd.get_dummies(new_df)
new_df_dummies


new_df_dummies_full = new_df_dummies.reindex(columns = train_X.columns, fill_value = 0)
new_df_dummies_full


pd.DataFrame(new_df_dummies_full.columns.values, columns = ["Variables"])


new_df_dummies_full_norm = pd.DataFrame(scaler.transform(new_df_dummies_full[predictors_numerical]), 
                                        columns = predictors_numerical)
new_df_dummies_full_norm.head()


new_df_dummies_full_norm_2 = pd.concat([new_df_dummies_full_norm, new_df_dummies_full[predictors_categorical]], 
                                       axis = 1)
new_df_dummies_full_norm_2.head()


pd.DataFrame(new_df_dummies_full_norm_2.columns.values, columns = ["Variables"])

	regular_fall	fast_fall	weight	walk_speed	run_speed	full_hop_height	short_hop_height	double_hop_height	before	after
count	72.000000	72.000000	72.00000	72.000000	72.000000	70.000000	70.000000	70.000000	72.000000	72.000000
mean	1.600069	2.546306	96.25000	1.104171	1.806428	33.211864	16.027429	34.440000	6.319444	7.722222
std	0.222869	0.353823	13.93218	0.213718	0.391566	5.112580	2.302452	6.379117	2.582860	1.576430
min	0.980000	1.568000	62.00000	0.620000	1.180000	19.790000	11.260000	19.790000	2.000000	5.000000
25%	1.472500	2.240000	88.00000	0.924000	1.591250	30.885375	14.500000	30.782500	4.000000	7.000000
50%	1.620000	2.592000	95.50000	1.133000	1.722500	33.000000	16.225000	33.500000	6.000000	8.000000
75%	1.770000	2.820000	104.50000	1.259000	1.979750	35.575000	17.482500	36.832500	8.250000	9.000000
max	2.100000	3.360000	135.00000	1.575000	3.850000	50.510000	22.110000	57.350000	10.000000	10.000000

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
2	link	Zelda	1.60	3.040	104	1.247	1.534	27.80	13.38	29.00	10	7	Expert
16	sheik	Zelda	1.75	2.800	78	1.470	2.420	39.00	18.75	40.00	6	5	Expert
17	zelda	Zelda	1.35	2.160	85	0.914	1.430	31.55	15.24	31.55	4	7	Amateur
22	young link	Zelda	1.80	2.880	88	1.260	1.749	33.66	16.26	33.66	7	10	Expert
23	ganondorf	Zelda	1.65	2.640	118	0.767	1.340	25.49	12.24	26.00	10	8	Expert
42	toon link	Zelda	1.38	2.208	91	1.288	1.906	33.80	16.32	33.80	3	7	Amateur

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after
count	6.000000	6.000000	6.000000	6.000000	6.000000	6.000000	6.00000	6.000000	6.000000	6.000000
mean	1.588333	2.621333	94.000000	1.157667	1.729833	31.883333	15.36500	32.335000	6.666667	7.333333
std	0.187127	0.362804	14.546477	0.262673	0.396977	4.798678	2.31917	4.785912	2.943920	1.632993
min	1.350000	2.160000	78.000000	0.767000	1.340000	25.490000	12.24000	26.000000	3.000000	5.000000
25%	1.435000	2.316000	85.750000	0.997250	1.456000	28.737500	13.84500	29.637500	4.500000	7.000000
50%	1.625000	2.720000	89.500000	1.253500	1.641500	32.605000	15.75000	32.605000	6.500000	7.000000
75%	1.725000	2.860000	100.750000	1.281000	1.866750	33.765000	16.30500	33.765000	9.250000	7.750000
max	1.800000	3.040000	118.000000	1.470000	2.420000	39.000000	18.75000	40.000000	10.000000	10.000000

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
2	link	Zelda	1.60	3.040	104	1.247	1.534	27.80	13.38	29.00	10	7	Expert
16	sheik	Zelda	1.75	2.800	78	1.470	2.420	39.00	18.75	40.00	6	5	Expert
17	zelda	Zelda	1.35	2.160	85	0.914	1.430	31.55	15.24	31.55	4	7	Amateur
22	young link	Zelda	1.80	2.880	88	1.260	1.749	33.66	16.26	33.66	7	10	Expert
23	ganondorf	Zelda	1.65	2.640	118	0.767	1.340	25.49	12.24	26.00	10	8	Expert
42	toon link	Zelda	1.38	2.208	91	1.288	1.906	33.80	16.32	33.80	3	7	Amateur

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after
count	6.000000	6.000000	6.000000	6.000000	6.000000	6.000000	6.00000	6.000000	6.000000	6.000000
mean	1.588333	2.621333	94.000000	1.157667	1.729833	31.883333	15.36500	32.335000	6.666667	7.333333
std	0.187127	0.362804	14.546477	0.262673	0.396977	4.798678	2.31917	4.785912	2.943920	1.632993
min	1.350000	2.160000	78.000000	0.767000	1.340000	25.490000	12.24000	26.000000	3.000000	5.000000
25%	1.435000	2.316000	85.750000	0.997250	1.456000	28.737500	13.84500	29.637500	4.500000	7.000000
50%	1.625000	2.720000	89.500000	1.253500	1.641500	32.605000	15.75000	32.605000	6.500000	7.000000
75%	1.725000	2.860000	100.750000	1.281000	1.866750	33.765000	16.30500	33.765000	9.250000	7.750000
max	1.800000	3.040000	118.000000	1.470000	2.420000	39.000000	18.75000	40.000000	10.000000	10.000000

Super Smash Parseltongue¶

1. Import¶

2. Description¶

3. Filtering¶

3.1 Single Criterion¶

3.2 Multiple Criteria¶

3.3 Columns¶

3.4 Rows¶

3.5 Concatenate¶

3.6 Sampling¶

3.7 Missing values¶

4. Recoding¶

4.1 Use a list for categorical variables¶

4.2 Use a function for numerical variables¶

4.3 Binary variables¶

4.4 Missing values¶

4.5 Normalisation¶

4.6 Transformation¶

5. Pivot Table¶

6. Training Validation Split¶

7. Normalisation Based on Training Set¶

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	full_hop_height	short_hop_height	double_hop_height	before	after	expertise
0	mario	Super Mario	1.50	2.400	98	1.155	1.760	36.33	17.54	36.33	3	8	Amateur
1	donkey kong	Donkey Kong	1.63	2.608	127	1.365	1.873	34.00	17.30	35.50	8	10	Expert
2	link	Zelda	1.60	3.040	104	1.247	1.534	27.80	13.38	29.00	10	7	Expert
3	samus	Metroid	1.33	2.168	108	1.115	1.654	37.00	18.00	37.00	6	9	Expert
4	dark samus	Metroid	1.33	2.168	108	1.115	1.654	37.00	18.00	37.00	2	5	Amateur

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
7	fox	Star Fox	2.10	3.36	77	1.523	2.402	35.0	16.40	37.0	6	8	Expert
47	little mac	Punch Out	1.95	3.12	87	1.386	2.464	26.0	12.53	26.0	6	8	Expert

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
7	fox	Star Fox	2.10	3.360	77	1.523	2.402	35.0	16.40	37.00	6	8	Expert
8	pikachu	Pokemon	1.55	2.480	79	1.302	2.039	35.5	17.12	35.50	8	9	Expert
9	luigi	Super Mario	1.32	2.112	97	1.134	1.650	44.0	19.98	41.31	7	10	Expert
50	mii swordfighter	Wii	1.55	2.480	100	1.070	1.580	28.3	12.30	28.30	8	7	Expert
51	mii gunner	Wii	1.45	2.320	104	1.000	1.370	30.9	14.60	32.30	7	8	Expert

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
55	bowser jr	Super Mario	1.65	2.640	108	0.924	1.566	34.40	16.59	34.40	7	9	Expert
22	young link	Zelda	1.80	2.880	88	1.260	1.749	33.66	16.26	33.66	7	10	Expert
27	mr game and watch	Nintendo	1.24	1.984	75	1.180	1.679	27.51	13.26	27.51	3	5	Amateur
50	mii swordfighter	Wii	1.55	2.480	100	1.070	1.580	28.30	12.30	28.30	8	7	Expert
35	diddy kong	Donkey Kong	1.75	2.800	90	1.313	2.006	41.21	19.90	41.21	10	9	Expert
12	jigglypuff	Pokemon	0.98	1.568	68	0.735	1.271	19.79	11.26	19.79	3	7	Amateur
9	luigi	Super Mario	1.32	2.112	97	1.134	1.650	44.00	19.98	41.31	7	10	Expert

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
37	sonic	Sonic	1.65	2.640	86	1.444	3.850	35.00	16.89	35.00	10	5	Expert
63	ridley	Metroid	1.78	2.848	107	1.100	2.200	34.00	14.20	32.00	2	6	Amateur
38	king dedede	Kirby	1.95	3.120	127	1.029	1.496	32.85	16.02	32.85	6	5	Expert
40	lucario	Pokemon	1.68	2.688	92	1.103	1.705	37.62	18.19	37.62	5	8	Amateur
29	pit	Icarus	1.48	2.368	96	1.259	1.828	31.00	14.96	31.00	8	9	Expert
19	pichu	Pokemon	1.90	2.500	62	1.302	1.892	36.75	17.43	36.02	6	9	Expert
1	donkey kong	Donkey Kong	1.63	2.608	127	1.365	1.873	34.00	17.30	35.50	8	10	Expert
21	marth	Fire Emblem	1.58	2.528	90	1.575	1.964	33.66	16.26	33.66	3	7	Amateur
67	isabelle	Animal Crossing	1.30	2.080	88	1.140	1.480	32.50	15.69	32.50	9	7	Expert
43	wolf	Star Fox	1.80	2.880	92	1.208	1.540	32.02	15.38	30.71	10	10	Expert

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
0	False	False	False	False	False	False	False	False	False	False	False	False	False
1	False	False	False	False	False	False	False	False	False	False	False	False	False
2	False	False	False	False	False	False	False	False	False	False	False	False	False
3	False	False	False	False	False	False	False	False	False	False	False	False	False
4	False	False	False	False	False	False	False	False	False	False	False	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...
67	False	False	False	False	False	False	False	False	False	False	False	False	False
68	False	False	False	False	False	False	False	False	False	False	False	False	False
69	False	False	False	False	False	False	False	False	False	False	False	False	False
70	False	False	False	False	False	False	False	True	True	True	False	False	False
71	False	False	False	False	False	False	False	True	True	True	False	False	False

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after	expertise
70	hero	Dragon Quest	1.57	2.512	101	0.98	1.84	NaN	NaN	NaN	6	7	Expert
71	banjo and kazooie	Banjo Kazooie	1.76	2.816	106	1.06	2.18	NaN	NaN	NaN	7	8	Expert

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after
0	-0.452156	-0.416400	0.126490	0.239502	-0.119401	0.614298	0.661683	0.298418	-1.294201	0.177443
1	0.135239	0.175589	2.222609	1.228999	0.171209	0.155269	0.556693	0.167367	0.655223	1.455036
2	-0.000314	1.405105	0.560170	0.672996	-0.700620	-1.066182	-1.158132	-0.858940	1.434992	-0.461353
3	-1.220287	-1.076696	0.849290	0.051026	-0.392008	0.746294	0.862912	0.404207	-0.124547	0.816239
4	-1.220287	-1.076696	0.849290	0.051026	-0.392008	0.746294	0.862912	0.404207	-1.684085	-1.738945
...	...	...	...	...	...	...	...	...	...	...
67	-1.355840	-1.327153	-0.596310	0.168823	-0.839496	-0.140243	-0.147610	-0.306313	1.045108	-0.461353
68	0.722633	0.767578	1.427530	-2.281362	-1.611026	-0.317551	-0.711928	-0.258945	0.265338	-1.100149
69	1.581133	0.522813	1.138410	-1.621696	-0.222272	0.825097	0.644185	0.644205	1.045108	-0.461353
70	-0.135866	-0.097637	0.343330	-0.585080	0.086340	NaN	NaN	NaN	-0.124547	-0.461353
71	0.722633	0.767578	0.704730	-0.208128	0.960741	NaN	NaN	NaN	0.265338	0.177443

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before
0	0.405465	0.875469	4.584967	0.144100	0.565314	3.592644	2.864484	3.592644	1.098612
1	0.488580	0.958584	4.844187	0.311154	0.627541	3.526361	2.850707	3.569533	2.079442
2	0.470004	1.111858	4.644391	0.220741	0.427879	3.325036	2.593761	3.367296	2.302585
3	0.285179	0.773805	4.682131	0.108854	0.503197	3.610918	2.890372	3.610918	1.791759
4	0.285179	0.773805	4.682131	0.108854	0.503197	3.610918	2.890372	3.610918	0.693147

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before
0	1.50	2.400	98.0	1.155	1.760	36.33	17.54	36.33	3.0
1	1.63	2.608	127.0	1.365	1.873	34.00	17.30	35.50	8.0
2	1.60	3.040	104.0	1.247	1.534	27.80	13.38	29.00	10.0
3	1.33	2.168	108.0	1.115	1.654	37.00	18.00	37.00	6.0
4	1.33	2.168	108.0	1.115	1.654	37.00	18.00	37.00	2.0

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before
0	0.176091	0.380211	1.991226	0.062582	0.245513	1.560265	1.244030	1.560265	0.477121
1	0.212188	0.416308	2.103804	0.135133	0.272538	1.531479	1.238046	1.550228	0.903090
2	0.204120	0.482874	2.017033	0.095866	0.185825	1.444045	1.126456	1.462398	1.000000
3	0.123852	0.336059	2.033424	0.047275	0.218536	1.568202	1.255273	1.568202	0.778151
4	0.123852	0.336059	2.033424	0.047275	0.218536	1.568202	1.255273	1.568202	0.301030

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before
0	7.50	12.00	490	5.775	8.800	181.65	87.7	181.65	15
1	8.15	13.04	635	6.825	9.365	170.00	86.5	177.50	40
2	8.00	15.20	520	6.235	7.670	139.00	66.9	145.00	50
3	6.65	10.84	540	5.575	8.270	185.00	90.0	185.00	30
4	6.65	10.84	540	5.575	8.270	185.00	90.0	185.00	10

	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before
0	1.50	2.40	98.0	1.16	1.76	36.33	17.54	36.33	3.0
1	1.63	2.61	127.0	1.36	1.87	34.00	17.30	35.50	8.0
2	1.60	3.04	104.0	1.25	1.53	27.80	13.38	29.00	10.0
3	1.33	2.17	108.0	1.12	1.65	37.00	18.00	37.00	6.0
4	1.33	2.17	108.0	1.12	1.65	37.00	18.00	37.00	2.0

	regular_fall
game
Animal Crossing	1.310000
Banjo Kazooie	1.760000
Bayonetta	1.770000
Castlevania	1.850000
Donkey Kong	1.693333
Dragon Quest	1.570000
Duck Hunt	1.650000
Earthbound	1.310000
F-Zero	1.832500
Final Fantasy	1.680000
Fire Emblem	1.663333
Icarus	1.515000
Kirby	1.580000
Mega Man	1.800000
Metal Gear	1.730000
Metroid	1.535000
Mother	1.370000
Nintendo	1.240000
Pikmin	1.350000
Pokemon	1.610000
Punch Out	1.950000
Sonic	1.650000
Splatoon	1.580000
Stack Up	1.600000
Star Fox	1.950000
Street Fighter	1.600000
Super Mario	1.497000
Wii	1.552500
Xenoblade	1.580000
Zelda	1.588333

	character	game	regular_fall	fast_fall	weight	walk_speed	run_speed	FULL_HOP_HEIGHT_RENAME	short_hop_height	double_hop_height	before	after
64	simon	Castlevania	1.85	2.960	107	0.760	1.520	30.00	18.00	29.00	3	9
22	young link	Zelda	1.80	2.880	88	1.260	1.749	33.66	16.26	33.66	7	10
38	king dedede	Kirby	1.95	3.120	127	1.029	1.496	32.85	16.02	32.85	6	5
25	roy	Fire Emblem	1.80	2.880	95	1.208	2.145	30.97	13.00	28.00	3	6
71	banjo and kazooie	Banjo Kazooie	1.76	2.816	106	1.060	2.180	NaN	NaN	NaN	7	8

	regular_fall	fast_fall	weight
0	1.060291	1.114411	0.822010
1	0.843285	0.894801	-0.515164
2	1.494303	1.553631	2.229562
3	0.843285	0.894801	-0.022521
4	0.669681	0.719113	0.751633

	regular_fall	fast_fall	weight
0	0.192267	0.235971	-0.655919
1	0.669681	0.719113	1.455409
2	0.105465	0.148127	2.229562
3	-0.241745	-0.203249	-0.304031
4	-1.109769	-1.081689	-0.726297

	regular_fall	fast_fall	weight	game_Animal Crossing	game_Banjo Kazooie	game_Bayonetta	game_Castlevania	game_Donkey Kong	game_Dragon Quest	game_Duck Hunt	...	game_Punch Out	game_Sonic	game_Splatoon	game_Stack Up	game_Star Fox	game_Street Fighter	game_Super Mario	game_Wii	game_Xenoblade	game_Zelda
0	0.843285	-2.344446	-0.515164	0	0	0	0	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	3.013346	-1.520909	-1.430073	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1