Hi Team,
I ran the requirement from the question, first with numerical data and compared with dummy. None of my answer matched, and thus i got the answer wrong. No Problem …
I went through the solution and then ran the solution which your team have provided and i got nan for all 10 folds(expected as we have string, non-numerical data?).
# ur team solution
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
ames_housing = pd.read_csv("../datasets/ames_housing_no_missing.csv")
target_name = "SalePrice"
data = ames_housing.drop(columns=target_name)
target = ames_housing[target_name]
model = make_pipeline(StandardScaler(), LogisticRegression())
cv_results = cross_validate(
model, data, target, cv=10, return_estimator=True
)
test_score_lr = cv_results["test_score"]
test_score_lr
# my first attempt, no option matched
numerical_features = [
"LotFrontage", "LotArea", "MasVnrArea", "BsmtFinSF1", "BsmtFinSF2",
"BsmtUnfSF", "TotalBsmtSF", "1stFlrSF", "2ndFlrSF", "LowQualFinSF",
"GrLivArea", "BedroomAbvGr", "KitchenAbvGr", "TotRmsAbvGrd", "Fireplaces",
"GarageCars", "GarageArea", "WoodDeckSF", "OpenPorchSF", "EnclosedPorch",
"3SsnPorch", "ScreenPorch", "PoolArea", "MiscVal",
]
data_numerical = data[numerical_features]
cv_results = cross_validate(
model, data_numerical, target, cv=10, return_estimator=True
)
test_score_lr = cv_results["test_score"]
test_score_lr