Thanks for your answer.
I tried this :
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
make_column_transformer(
(StandardScaler(), data[numerical_features]),
(OneHotEncoder(handle_unknown="ignore"), data.drop(columns=numerical_features)))
model2 = make_pipeline(
make_column_transformer, LogisticRegression(max_iter=500))
cv_results2 = cross_validate(model2, data, target, cv=10)
cv_results2
Here is the traceback error :
ValueError Traceback (most recent call last)
Cell In[12], line 4
1 model2 = make_pipeline(
2 make_column_transformer, LogisticRegression(max_iter=500))
----> 4 cv_results2 = cross_validate(model2, data, target, cv=10)
5 cv_results2
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:285, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
266 results = parallel(
267 delayed(_fit_and_score)(
268 clone(estimator),
(...)
282 for train, test in cv.split(X, y, groups)
283 )
--> 285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
290 if callable(scoring):
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:367, in _warn_or_raise_about_fit_failures(results, error_score)
360 if num_failed_fits == num_fits:
361 all_fits_failed_message = (
362 f"\nAll the {num_fits} fits failed.\n"
363 "It is very likely that your model is misconfigured.\n"
364 "You can try to debug the error by setting error_score='raise'.\n\n"
365 f"Below are more details about the failures:\n{fit_errors_summary}"
366 )
--> 367 raise ValueError(all_fits_failed_message)
369 else:
370 some_fits_failed_message = (
371 f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
372 "The score on these train-test partitions for these parameters"
(...)
376 f"Below are more details about the failures:\n{fit_errors_summary}"
377 )
ValueError:
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/opt/conda/lib/python3.10/site-packages/sklearn/pipeline.py", line 402, in fit
Xt = self._fit(X, y, **fit_params_steps)
File "/opt/conda/lib/python3.10/site-packages/sklearn/pipeline.py", line 340, in _fit
self._validate_steps()
File "/opt/conda/lib/python3.10/site-packages/sklearn/pipeline.py", line 231, in _validate_steps
raise TypeError(
TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' '<function make_column_transformer at 0x7f3d217ac700>' (type <class 'function'>) doesn't