Modelado

Preparación

X = datos.loc[:, datos.columns != 'ganador']
X = pd.DataFrame(X, columns = X.columns)
y = datos["ganador"]

procesamiento = ColumnTransformer(
  transformers = [
    ('cat', OneHotEncoder(), ['fecha', 'local', 'away'])
  ],
  remainder = 'passthrough'
)

Modelos

KNN

modelo_knn = Pipeline(steps = [
  ('preprocessor', procesamiento),
  ('scaler', StandardScaler(with_mean=False)),
  ('modelo', KNeighborsClassifier(n_neighbors = 5, algorithm = "brute"))
])

pred_knn = cross_val_predict(modelo_knn, X, y, cv = 5)
pred_knn
array(['local', 'local', 'local', ..., 'visita', 'local', 'local'],
      dtype=object)

Árbol

modelo_arbol = Pipeline(steps = [
  ('preprocessor', procesamiento),
  ('scaler', StandardScaler(with_mean=False)),
  ('modelo', DecisionTreeClassifier(criterion = 'gini', max_depth = 2))
])

pred_arbol = cross_val_predict(modelo_arbol, X, y, cv = 5)
pred_arbol
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
      dtype=object)

Bosques

modelo_bosques = Pipeline(steps = [
  ('preprocessor', procesamiento),
  ('scaler', StandardScaler(with_mean=False)),
  ('modelo', RandomForestClassifier(n_estimators = 500, criterion = "gini", min_samples_split = 50))
])

pred_bosques = cross_val_predict(modelo_bosques, X, y, cv = 5)
pred_bosques
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
      dtype=object)

Potenciación

modelo_ada = Pipeline(steps = [
  ('preprocessor', procesamiento),
  ('scaler', StandardScaler(with_mean=False)),
  ('modelo', GradientBoostingClassifier(n_estimators = 500, max_depth = 2, learning_rate = 0.1, min_samples_split = 10))
])

pred_ada = cross_val_predict(modelo_ada, X, y, cv = 5)
pred_ada
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
      dtype=object)

SVM

modelo_svm = Pipeline(steps = [
  ('preprocessor', procesamiento),
  ('scaler', StandardScaler(with_mean=False)),
  ('modelo', SVC(kernel = 'rbf', gamma = 'scale'))
])

pred_svm = cross_val_predict(modelo_svm, X, y, cv = 5)
pred_svm
array(['visita', 'visita', 'local', ..., 'visita', 'local', 'local'],
      dtype=object)