Overfit-generalization-underfit

fhi62 · 30 May 2021 20:19

When I tried to (re)do the M2 sessions, I received a memory error
on entry 5:

import pandas as pd
from sklearn.model_selection import cross_validate, ShuffleSplit

cv = ShuffleSplit(n_splits=30, test_size=0.2)
cv_results = cross_validate(regressor, data, target,
                            cv=cv, scoring="neg_mean_absolute_error",
                            return_train_score=True, n_jobs=2)
cv_results = pd.DataFrame(cv_results)

Message is : 
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-5-c5f76dcd5607> in <module>
      3 
      4 cv = ShuffleSplit(n_splits=30, test_size=0.2)
----> 5 cv_results = cross_validate(regressor, data, target,
      6                             cv=cv, scoring="neg_mean_absolute_error",
      7                             return_train_score=True, n_jobs=2)

/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

/opt/conda/lib/python3.9/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    248     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
    249                         pre_dispatch=pre_dispatch)
--> 250     results = parallel(
    251         delayed(_fit_and_score)(
    252             clone(estimator), X, y, scorers, train, test, verbose, None,

/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable)
   1039             # remaining jobs.
   1040             self._iterating = False
-> 1041             if self.dispatch_one_batch(iterator):
   1042                 self._iterating = self._original_iterator is not None
   1043 

/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    857                 return False
    858             else:
--> 859                 self._dispatch(tasks)
    860                 return True
    861 

/opt/conda/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch)
    775         with self._lock:
    776             job_idx = len(self._jobs)
--> 777             job = self._backend.apply_async(batch, callback=cb)
    778             # A job can complete so quickly than its callback is
    779             # called before we get here, causing self._jobs to

/opt/conda/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
    529     def apply_async(self, func, callback=None):
    530         """Schedule a func to be run"""
--> 531         future = self._workers.submit(SafeFunction(func))
    532         future.get = functools.partial(self.wrap_future_result, future)
    533         if callback is not None:

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/reusable_executor.py in submit(self, fn, *args, **kwargs)
    175     def submit(self, fn, *args, **kwargs):
    176         with self._submit_resize_lock:
--> 177             return super(_ReusablePoolExecutor, self).submit(
    178                 fn, *args, **kwargs)
    179 

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py in submit(self, fn, *args, **kwargs)
   1120             self._executor_manager_thread_wakeup.wakeup()
   1121 
-> 1122             self._ensure_executor_running()
   1123             return f
   1124     submit.__doc__ = _base.Executor.submit.__doc__

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py in _ensure_executor_running(self)
   1094         with self._processes_management_lock:
   1095             if len(self._processes) != self._max_workers:
-> 1096                 self._adjust_process_count()
   1097             self._start_executor_manager_thread()
   1098 

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py in _adjust_process_count(self)
   1085                 p = self._context.Process(target=_process_worker, args=args)
   1086             p._worker_exit_lock = worker_exit_lock
-> 1087             p.start()
   1088             self._processes[p.pid] = p
   1089         mp.util.debug('Adjust process count : {}'.format(self._processes))

/opt/conda/lib/python3.9/multiprocessing/process.py in start(self)
    119                'daemonic processes are not allowed to have children'
    120         _cleanup()
--> 121         self._popen = self._Popen(self)
    122         self._sentinel = self._popen.sentinel
    123         # Avoid a refcycle if the target function holds an indirect

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/backend/process.py in _Popen(process_obj)
     37         else:
     38             from .popen_loky_posix import Popen
---> 39         return Popen(process_obj)
     40 
     41     if sys.version_info < (3, 3):

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/backend/popen_loky_posix.py in __init__(self, process_obj)
     50             self.returncode = None
     51             self._fds = []
---> 52             self._launch(process_obj)
     53 
     54         if sys.version_info < (3, 4):

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/backend/popen_loky_posix.py in _launch(self, process_obj)
    127             set_spawning_popen(self)
    128             try:
--> 129                 prep_data = spawn.get_preparation_data(
    130                     process_obj._name,
    131                     getattr(process_obj, "init_main_module", True))

/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/backend/spawn.py in get_preparation_data(name, init_main_module)
    108         # process is created (othewise the child won't be able to use it if it
    109         # is created later on)
--> 110         mp_resource_tracker.ensure_running()
    111         d["mp_tracker_args"] = {
    112             'fd': mp_resource_tracker._fd, 'pid': mp_resource_tracker._pid

/opt/conda/lib/python3.9/multiprocessing/resource_tracker.py in ensure_running(self)
    119                     if _HAVE_SIGMASK:
    120                         signal.pthread_sigmask(signal.SIG_BLOCK, _IGNORED_SIGNALS)
--> 121                     pid = util.spawnv_passfds(exe, args, fds_to_pass)
    122                 finally:
    123                     if _HAVE_SIGMASK:

/opt/conda/lib/python3.9/multiprocessing/util.py in spawnv_passfds(path, args, passfds)
    450     errpipe_read, errpipe_write = os.pipe()
    451     try:
--> 452         return _posixsubprocess.fork_exec(
    453             args, [os.fsencode(path)], True, passfds, None, None,
    454             -1, -1, -1, -1, -1, -1, errpipe_read, errpipe_write,

OSError: [Errno 12] Cannot allocate memory

glemaitre58 · 31 May 2021 08:09

We recently have issue with the docker system and the memory management on the server.
Could you force n_jobs=1 temporary. It will work but it will be a bit slower.

ogrisel · 31 May 2021 08:17

Can you try to open the “running notebooks” entry in the “File” menu and shutdown any other running notebooks you do not need anymore? This should free so memory and then you should be able to retry with to run this notebook with n_jobs=2 everywhere.

fhi62 · 31 May 2021 09:07

Thanks . The memory issue is solved