= datos.loc[:, datos.columns != 'ganador']
X = pd.DataFrame(X, columns = X.columns)
X = datos["ganador"]
y
= ColumnTransformer(
procesamiento = [
transformers 'cat', OneHotEncoder(), ['fecha', 'local', 'away'])
(
],= 'passthrough'
remainder )
Modelado
Preparación
Modelos
KNN
= Pipeline(steps = [
modelo_knn 'preprocessor', procesamiento),
('scaler', StandardScaler(with_mean=False)),
('modelo', KNeighborsClassifier(n_neighbors = 5, algorithm = "brute"))
(
])
= cross_val_predict(modelo_knn, X, y, cv = 5)
pred_knn pred_knn
array(['local', 'local', 'local', ..., 'visita', 'local', 'local'],
dtype=object)
Árbol
= Pipeline(steps = [
modelo_arbol 'preprocessor', procesamiento),
('scaler', StandardScaler(with_mean=False)),
('modelo', DecisionTreeClassifier(criterion = 'gini', max_depth = 2))
(
])
= cross_val_predict(modelo_arbol, X, y, cv = 5)
pred_arbol pred_arbol
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
dtype=object)
Bosques
= Pipeline(steps = [
modelo_bosques 'preprocessor', procesamiento),
('scaler', StandardScaler(with_mean=False)),
('modelo', RandomForestClassifier(n_estimators = 500, criterion = "gini", min_samples_split = 50))
(
])
= cross_val_predict(modelo_bosques, X, y, cv = 5)
pred_bosques pred_bosques
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
dtype=object)
Potenciación
= Pipeline(steps = [
modelo_ada 'preprocessor', procesamiento),
('scaler', StandardScaler(with_mean=False)),
('modelo', GradientBoostingClassifier(n_estimators = 500, max_depth = 2, learning_rate = 0.1, min_samples_split = 10))
(
])
= cross_val_predict(modelo_ada, X, y, cv = 5)
pred_ada pred_ada
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
dtype=object)
SVM
= Pipeline(steps = [
modelo_svm 'preprocessor', procesamiento),
('scaler', StandardScaler(with_mean=False)),
('modelo', SVC(kernel = 'rbf', gamma = 'scale'))
(
])
= cross_val_predict(modelo_svm, X, y, cv = 5)
pred_svm pred_svm
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
dtype=object)