## Importing Libraries import pandas as pd from category_encoders import TargetEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.ensemble import RandomForestClassifier ## Importing Dataset columns=[‘class’,’age’,’menopause’,’tumor_size’,’inv_nodes’,’node_caps’,’deg_malig’,’breast’,’breast_quad’,’irradiat’] df=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data",names=columns) ## Data Processing df[‘class’]=df[‘class’].apply(lambda x: 0 if x==’no-recurrence-events’ else 1) categorical_data=df.select_dtypes(include=[‘object’]) df_te=df.copy() …
for col in df_te.select_dtypes(include=[‘object’]).columns: target_encoder=TargetEncoder() df_te[col]=target_encoder.fit_transform(df_te[col],df_te[‘class’]) df_te
target_encoder=TargetEncoder() target_encoder.fit_transform(df_te[‘menopause’],df_te[‘class’])
## Importing Libraries import pandas as pd from category_encoders import HashingEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.ensemble import RandomForestClassifier ## Importing Dataset columns=[‘class’,’age’,’menopause’,’tumor_size’,’inv_nodes’,’node_caps’,’deg_malig’,’breast’,’breast_quad’,’irradiat’] df=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data",names=columns) ## Data Processing df[‘class’]=df[‘class’].apply(lambda x: 0 if x==’no-recurrence-events’ else 1) categorical_data=df.select_dtypes(include=[‘object’]) df_he=df.copy() …
for column_name in categorical_data.columns : unique_components=len(categorical_data[column_name].unique()) hashencoder=HashingEncoder(cols=column_name,n_components=unique_components) transformed_data=hashencoder.fit_transform(df_he[column_name]) temp_col=transformed_data.columns transformed_data.columns=[column_name + col for col in temp_col] df_he[transformed_data.columns ] = transformed_data df_he