one hot encoding example python
## Importing Libraries import pandas as pd from sklearn.preprocessing import OneHotEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.ensemble import RandomForestClassifier ## Download the Dataset columns=['class','age','menopause','tumor_size','inv_nodes','node_caps','deg_malig','breast','breast_quad','irradiat'] df=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data",names=columns) ## Data Processing df['class']=df['class'].apply(lambda x: 0 if x=='no-recurrence-events' else 1) categorical_data=df.select_dtypes(include=['object']) df_ohe=df.copy() ## Encoding for column_name in categorical_data.columns: temp_col=[] for value in categorical_data[column_name].unique(): temp_col.append(column_name + value) onehotencoder=OneHotEncoder() transformed_data=onehotencoder.fit_transform(categorical_data[[column_name]]).toarray() df_ohe[temp_col] =transformed_data ## Splitting Dataset df_ohe=df_ohe.drop(columns=categorical_data.columns ) y=df_ohe.pop('class') X_train, X_test, y_train, y_test = train_test_split(df_ohe, y, test_size=0.1, random_state=42) ## Model Implementation clf = RandomForestClassifier() clf = clf.fit(X_train, y_train) predictions=clf.predict(X_test) print(classification_report(predictions,y_test))