import os
import math
import joblib
import warnings
import pandas as pd
import numpy as np
from datetime import date
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
Introducción
Problema de clasificación utilizando datos que contienen estadísticas de partidos de la NBA, incluyendo información sobre el lugar, fecha, equipos involucrados y estadísticas individuales para los equipos local y visitante. Se va a predecir si el equipo local o el visitante gana el encuentro.
Paquetes
Carga de datos
= pd.read_csv('data/nba_juegos.csv', delimiter = ';', decimal = ".")
datos datos
fecha | lugar | local | away | id | pts | MIN_local | FG_local | PT3_local | TL_local | ... | TL_visita | OREB_visita | DREB_visita | REB_visita | AST_visita | STL_visita | TOV_visita | PF_visita | PM_visita | PTS_visita | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2023-01-01 | TN | Grizzlies | Kings | 401468702 | 226 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
1 | 2023-01-01 | WI | Bucks | Wizards | 401468703 | 213 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
2 | 2023-01-01 | CO | Nuggets | Celtics | 401468704 | 234 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
3 | 2023-01-02 | NY | Knicks | Suns | 401468705 | 185 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
4 | 2023-01-02 | NC | Hornets | Lakers | 401468706 | 236 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2729 | 2025-01-03 | OK | Thunder | Knicks | 401705039 | 224 | 229.333333 | 5.419823 | 4.313889 | 2.575397 | ... | 5.009524 | 12.300000 | 35.100000 | 47.400000 | 31.900000 | 7.900000 | 18.200000 | 20.000000 | -34.900000 | 125.900000 |
2730 | 2025-01-03 | TX | Mavericks | Cavaliers | 401705040 | 256 | 181.333333 | 5.469890 | 3.153846 | 2.025000 | ... | 4.495833 | 13.333333 | 41.000000 | 54.333333 | 42.166667 | 4.666667 | 19.500000 | 18.833333 | -25.333333 | 142.333333 |
2731 | 2025-01-03 | CO | Nuggets | Spurs | 401705041 | 223 | 206.833333 | 3.859577 | 2.543737 | 5.183333 | ... | 7.263968 | 12.766667 | 49.566667 | 62.333333 | 44.466667 | 10.616667 | 21.483333 | 22.683333 | -31.566667 | 168.683333 |
2732 | 2025-01-03 | CA | Kings | Grizzlies | 401705042 | 271 | 221.833333 | 3.705222 | 3.760185 | 4.192460 | ... | 4.520370 | 9.783333 | 29.683333 | 39.466667 | 12.066667 | 5.983333 | 13.000000 | 21.950000 | -13.466667 | 95.733333 |
2733 | 2025-01-03 | CA | Lakers | Hawks | 401705043 | 221 | 277.416667 | 4.964283 | 3.246483 | 3.343254 | ... | 2.880952 | 13.416667 | 34.666667 | 48.083333 | 26.416667 | 10.916667 | 13.000000 | 20.166667 | -33.166667 | 121.000000 |
2734 rows × 32 columns