Hi all,
I have an error.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
----> 1 cv_result4 = cross_validate(model4, data, target, error_score=‘raise’)
/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
/opt/conda/lib/python3.9/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
248 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
249 pre_dispatch=pre_dispatch)
--> 250 results = parallel(
251 delayed(_fit_and_score)(
252 clone(estimator), X, y, scorers, train, test, verbose, None,
/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable)
1039 # remaining jobs.
1040 self._iterating = False
-> 1041 if self.dispatch_one_batch(iterator):
1042 self._iterating = self._original_iterator is not None
1043
/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
857 return False
858 else:
--> 859 self._dispatch(tasks)
860 return True
861
/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch)
775 with self._lock:
776 job_idx = len(self._jobs)
--> 777 job = self._backend.apply_async(batch, callback=cb)
778 # A job can complete so quickly than its callback is
779 # called before we get here, causing self._jobs to
/opt/conda/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
/opt/conda/lib/python3.9/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
/opt/conda/lib/python3.9/site-packages/sklearn/utils/fixes.py in __call__(self, *args, **kwargs)
220 def __call__(self, *args, **kwargs):
221 with config_context(**self.config):
--> 222 return self.function(*args, **kwargs)
/opt/conda/lib/python3.9/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
596 estimator.fit(X_train, **fit_params)
597 else:
--> 598 estimator.fit(X_train, y_train, **fit_params)
599
600 except Exception as e:
/opt/conda/lib/python3.9/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
339 """
340 fit_params_steps = self._check_fit_params(**fit_params)
--> 341 Xt = self._fit(X, y, **fit_params_steps)
342 with _print_elapsed_time('Pipeline',
343 self._log_message(len(self.steps) - 1)):
/opt/conda/lib/python3.9/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
301 cloned_transformer = clone(transformer)
302 # Fit or load from cache the current transformer
--> 303 X, fitted_transformer = fit_transform_one_cached(
304 cloned_transformer, X, y, None,
305 message_clsname='Pipeline',
/opt/conda/lib/python3.9/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
/opt/conda/lib/python3.9/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
752 with _print_elapsed_time(message_clsname, message):
753 if hasattr(transformer, 'fit_transform'):
--> 754 res = transformer.fit_transform(X, y, **fit_params)
755 else:
756 res = transformer.fit(X, y, **fit_params).transform(X)
/opt/conda/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
503 self._validate_transformers()
504 self._validate_column_callables(X)
--> 505 self._validate_remainder(X)
506
507 result = self._fit_transform(X, y, _fit_transform_one)
/opt/conda/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py in _validate_remainder(self, X)
322
323 # Make it possible to check for reordered named columns on transform
--> 324 self._has_str_cols = any(_determine_key_type(cols) == 'str'
325 for cols in self._columns)
326 if hasattr(X, 'columns'):
/opt/conda/lib/python3.9/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
322
323 # Make it possible to check for reordered named columns on transform
--> 324 self._has_str_cols = any(_determine_key_type(cols) == 'str'
325 for cols in self._columns)
326 if hasattr(X, 'columns'):
/opt/conda/lib/python3.9/site-packages/sklearn/utils/__init__.py in _determine_key_type(key, accept_slice)
266 except KeyError:
267 raise ValueError(err_msg)
--> 268 raise ValueError(err_msg)
269
270
ValueError: No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowed
I have ensure the numerical_fea and categorical_fea are in data frame format.
from sklearn.compose import make_column_selector as selector, ColumnTransformer from sklearn.pipeline import make_pipeline from sklearn.model_selection import cross_validate from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer categorical = selector(dtype_include=object) categorical_fea = categorical(data) numerical = selector(dtype_exclude=object) numerical_fea = numerical(data numerical_fea_df = pd.DataFrame(data[numerical_fea]) numerical_fea_df.head(2) categorical_fea_df = pd.DataFrame(data[categorical_fea]) categorical_fea_df.head(2) numerical_preprocessor = make_pipeline(StandardScaler(), SimpleImputer()) categorical_preprocessor = make_pipeline(SimpleImputer(strategy='most_frequent'), OneHotEncoder(handle_unknown='ignore')) preprocessor = ColumnTransformer(transformers=[('nume', numerical_preprocessor, numerical_fea_df), ('cate', categorical_preprocessor, categorical_fea_df)]) model4 = make_pipeline(preprocessor, LogisticRegression(max_iter=1000)) cv_result4 = cross_validate(model4, data, target, error_score='raise')
Anyone can let me know where is the error and how I can improve to get the cross validate step?