low variance high bias
import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_regression from sklearn.model_selection import learning_curve from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import Ridge # Generate a synthetic dataset X, y = make_regression(n_samples=100, n_features=1, noise=20, random_state=42) # Define the model complexities (degrees of polynomial features) degrees = [1, 4, 15] # Plot the learning curves for different model complexities for degree in degrees: model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=0.1)) train_sizes, train_scores, valid_scores = learning_curve(model, X, y, cv=5) plt.plot(train_sizes, np.mean(train_scores, axis=1), label=f'Degree {degree} (training)') plt.plot(train_sizes, np.mean(valid_scores, axis=1), label=f'Degree {degree} (validation)') plt.xlabel('Training set size') plt.ylabel('Score') plt.legend(loc='best') plt.show()