Still facing issue in M05 final Quiz question 4.
Change the code and get an error when building the pipeline to integrate categorical values (not sure also about OneHotEncoder or OrdinalEncoder)
My code:
import pandas as pd
ames_housing = pd.read_csv("../datasets/house_prices.csv", na_values="?")
target_name = "SalePrice"
data = ames_housing.drop(columns=target_name)
target = ames_housing[target_name]
numerical_columns = [
"LotFrontage", "LotArea", "MasVnrArea", "BsmtFinSF1", "BsmtFinSF2",
"BsmtUnfSF", "TotalBsmtSF", "1stFlrSF", "2ndFlrSF", "LowQualFinSF",
"GrLivArea", "BedroomAbvGr", "KitchenAbvGr", "TotRmsAbvGrd", "Fireplaces",
"GarageCars", "GarageArea", "WoodDeckSF", "OpenPorchSF", "EnclosedPorch",
"3SsnPorch", "ScreenPorch", "PoolArea", "MiscVal",
]
from sklearn.compose import make_column_selector as selector
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
data_numerical = data[numerical_features]
categorical_columns_selector = selector(dtype_include=object)
categorical_columns = categorical_columns_selector(data)
categorical_preprocessor = OrdinalEncoder()
numerical_preprocessor = StandardScaler()
from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer([
('Ordinal Encoder', categorical_preprocessor, categorical_columns),
('standard-scaler', numerical_preprocessor, numerical_columns)])
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeRegressor
model = Pipeline([
("preprocessor", preprocessor),
("classifier", SimpleImputer(), DecisionTreeRegressor(random_state=0, max_depth=3))])
from sklearn.model_selection import cross_validate
cv_results_model = cross_validate(
model, data, target, cv=10, return_estimator=True,
n_jobs=2
)
cv_results_model["test_score"].mean()