[docs]defsetup_common_training_handlers(trainer,train_sampler=None,to_save=None,save_every_iters=1000,output_path=None,lr_scheduler=None,with_gpu_stats=False,output_names=None,with_pbars=True,with_pbar_on_iters=True,log_every_iters=100,device=None,stop_on_nan=True,clear_cuda_cache=True,):"""Helper method to setup trainer with common handlers (it also supports distributed configuration): - :class:`~ignite.handlers.TerminateOnNan` - handler to setup learning rate scheduling - :class:`~ignite.handlers.ModelCheckpoint` - :class:`~ignite.metrics.RunningAverage` on `update_function` output - Two progress bars on epochs and optionally on iterations Args: trainer (Engine): trainer engine. Output of trainer's `update_function` should be a dictionary or sequence or a single tensor. train_sampler (torch.utils.data.DistributedSampler, optional): Optional distributed sampler used to call `set_epoch` method on epoch started event. to_save (dict, optional): dictionary with objects to save in the checkpoint. This argument is passed to :class:`~ignite.handlers.Checkpoint` instance. save_every_iters (int, optional): saving interval. By default, `to_save` objects are stored each 1000 iterations. output_path (str, optional): output path to indicate where `to_save` objects are stored. lr_scheduler (ParamScheduler or subclass of `torch.optim.lr_scheduler._LRScheduler`): learning rate scheduler as native torch LRScheduler or ignite's parameter scheduler. with_gpu_stats (bool, optional): if True, :class:`~ignite.contrib.metrics.handlers.GpuInfo` is attached to the trainer. This requires `pynvml` package to be installed. output_names (list/tuple, optional): list of names associated with `update_function` output dictionary. with_pbars (bool, optional): if True, two progress bars on epochs and optionally on iterations are attached. Default, True. with_pbar_on_iters (bool, optional): if True, a progress bar on iterations is attached to the trainer. Default, True. log_every_iters (int, optional): logging interval for :class:`~ignite.contrib.metrics.handlers.GpuInfo` and for epoch-wise progress bar. Default, 100. stop_on_nan (bool, optional): if True, :class:`~ignite.handlers.TerminateOnNan` handler is added to the trainer. Default, True. clear_cuda_cache (bool, optional): if True, `torch.cuda.empty_cache()` is called every end of epoch. Default, True. device (str of torch.device, optional): deprecated argument, it will be removed in v0.5.0. """ifdeviceisnotNone:warnings.warn("Argument device is unused and deprecated. It will be removed in v0.5.0")kwargs=dict(to_save=to_save,save_every_iters=save_every_iters,output_path=output_path,lr_scheduler=lr_scheduler,with_gpu_stats=with_gpu_stats,output_names=output_names,with_pbars=with_pbars,with_pbar_on_iters=with_pbar_on_iters,log_every_iters=log_every_iters,stop_on_nan=stop_on_nan,clear_cuda_cache=clear_cuda_cache,)ifidist.get_world_size()>1:_setup_common_distrib_training_handlers(trainer,train_sampler=train_sampler,**kwargs)else:iftrain_samplerisnotNoneandisinstance(train_sampler,DistributedSampler):warnings.warn("Argument train_sampler is a distributed sampler,"" but either there is no distributed setting or world size is < 2. ""Train sampler argument will be ignored",UserWarning,)_setup_common_training_handlers(trainer,**kwargs)
setup_common_distrib_training_handlers=setup_common_training_handlersdef_setup_common_training_handlers(trainer,to_save=None,save_every_iters=1000,output_path=None,lr_scheduler=None,with_gpu_stats=False,output_names=None,with_pbars=True,with_pbar_on_iters=True,log_every_iters=100,stop_on_nan=True,clear_cuda_cache=True,):ifstop_on_nan:trainer.add_event_handler(Events.ITERATION_COMPLETED,TerminateOnNan())iflr_schedulerisnotNone:ifisinstance(lr_scheduler,torch.optim.lr_scheduler._LRScheduler):trainer.add_event_handler(Events.ITERATION_COMPLETED,lambdaengine:lr_scheduler.step())elifisinstance(lr_scheduler,LRScheduler):trainer.add_event_handler(Events.ITERATION_COMPLETED,lr_scheduler)else:trainer.add_event_handler(Events.ITERATION_STARTED,lr_scheduler)iftorch.cuda.is_available()andclear_cuda_cache:trainer.add_event_handler(Events.EPOCH_COMPLETED,empty_cuda_cache)ifto_saveisnotNone:ifoutput_pathisNone:raiseValueError("If to_save argument is provided then output_path argument should be also defined")checkpoint_handler=Checkpoint(to_save,DiskSaver(dirname=output_path,require_empty=False),filename_prefix="training",)trainer.add_event_handler(Events.ITERATION_COMPLETED(every=save_every_iters),checkpoint_handler)ifwith_gpu_stats:GpuInfo().attach(trainer,name="gpu",event_name=Events.ITERATION_COMPLETED(every=log_every_iters))ifoutput_namesisnotNone:defoutput_transform(x,index,name):ifisinstance(x,Mapping):returnx[name]elifisinstance(x,Sequence):returnx[index]elifisinstance(x,(torch.Tensor,numbers.Number)):returnxelse:raiseValueError("Unhandled type of update_function's output. ""It should either mapping or sequence, but given {}".format(type(x)))fori,ninenumerate(output_names):RunningAverage(output_transform=partial(output_transform,index=i,name=n),epoch_bound=False).attach(trainer,n)ifwith_pbars:ifwith_pbar_on_iters:ProgressBar(persist=False).attach(trainer,metric_names="all",event_name=Events.ITERATION_COMPLETED(every=log_every_iters))ProgressBar(persist=True,bar_format="").attach(trainer,event_name=Events.EPOCH_STARTED,closing_event_name=Events.COMPLETED)def_setup_common_distrib_training_handlers(trainer,train_sampler=None,to_save=None,save_every_iters=1000,output_path=None,lr_scheduler=None,with_gpu_stats=False,output_names=None,with_pbars=True,with_pbar_on_iters=True,log_every_iters=100,stop_on_nan=True,clear_cuda_cache=True,):_setup_common_training_handlers(trainer,to_save=to_save,output_path=output_path,save_every_iters=save_every_iters,lr_scheduler=lr_scheduler,with_gpu_stats=with_gpu_stats,output_names=output_names,with_pbars=(idist.get_rank()==0)andwith_pbars,with_pbar_on_iters=with_pbar_on_iters,log_every_iters=log_every_iters,stop_on_nan=stop_on_nan,clear_cuda_cache=clear_cuda_cache,)iftrain_samplerisnotNone:ifnotisinstance(train_sampler,DistributedSampler):raiseTypeError("Train sampler should be torch DistributedSampler and have `set_epoch` method")@trainer.on(Events.EPOCH_STARTED)defdistrib_set_epoch(engine):train_sampler.set_epoch(engine.state.epoch-1)defempty_cuda_cache(_):torch.cuda.empty_cache()importgcgc.collect()defsetup_any_logging(logger,logger_module,trainer,optimizers,evaluators,log_every_iters):raiseDeprecationWarning("ignite.contrib.engines.common.setup_any_logging is deprecated since 0.4.0. ""Please use ignite.contrib.engines.common._setup_logging instead.")def_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters):ifoptimizersisnotNone:fromtorch.optim.optimizerimportOptimizerifnotisinstance(optimizers,(Optimizer,Mapping)):raiseTypeError("Argument optimizers should be either a single optimizer or a dictionary or optimizers")ifevaluatorsisnotNone:ifnotisinstance(evaluators,(Engine,Mapping)):raiseTypeError("Argument evaluators should be either a single engine or a dictionary or engines")iflog_every_itersisNone:log_every_iters=1logger.attach_output_handler(trainer,event_name=Events.ITERATION_COMPLETED(every=log_every_iters),tag="training",metric_names="all")ifoptimizersisnotNone:# Log optimizer parametersifisinstance(optimizers,Optimizer):optimizers={None:optimizers}fork,optimizerinoptimizers.items():logger.attach_opt_params_handler(trainer,Events.ITERATION_STARTED(every=log_every_iters),optimizer,param_name="lr",tag=k)ifevaluatorsisnotNone:# Log evaluation metricsifisinstance(evaluators,Engine):evaluators={"validation":evaluators}event_name=Events.ITERATION_COMPLETEDifisinstance(logger,WandBLogger)elseNonegst=global_step_from_engine(trainer,custom_event_name=event_name)fork,evaluatorinevaluators.items():logger.attach_output_handler(evaluator,event_name=Events.COMPLETED,tag=k,metric_names="all",global_step_transform=gst)
[docs]defsetup_tb_logging(output_path,trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup TensorBoard logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: output_path (str): logging directory path trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: TensorboardLogger """logger=TensorboardLogger(log_dir=output_path,**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_visdom_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup Visdom logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: VisdomLogger """logger=VisdomLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_mlflow_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup MLflow logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: MLflowLogger """logger=MLflowLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_neptune_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup Neptune logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: NeptuneLogger """logger=NeptuneLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_wandb_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup WandB logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: WandBLogger """logger=WandBLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_plx_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup Polyaxon logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: PolyaxonLogger """logger=PolyaxonLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsetup_trains_logging(trainer,optimizers=None,evaluators=None,log_every_iters=100,**kwargs):"""Method to setup Trains logging on trainer and a list of evaluators. Logged metrics are: - Training metrics, e.g. running average loss values - Learning rate(s) - Evaluation metrics Args: trainer (Engine): trainer engine optimizers (torch.optim.Optimizer or dict of torch.optim.Optimizer, optional): single or dictionary of torch optimizers. If a dictionary, keys are used as tags arguments for logging. evaluators (Engine or dict of Engine, optional): single or dictionary of evaluators. If a dictionary, keys are used as tags arguments for logging. log_every_iters (int, optional): interval for loggers attached to iteration events. To log every iteration, value can be set to 1 or None. **kwargs: optional keyword args to be passed to construct the logger. Returns: TrainsLogger """logger=TrainsLogger(**kwargs)_setup_logging(logger,trainer,optimizers,evaluators,log_every_iters)returnlogger
[docs]defsave_best_model_by_val_score(output_path,evaluator,model,metric_name,n_saved=3,trainer=None,tag="val"):"""Method adds a handler to `evaluator` to save best models based on the score (named by `metric_name`) provided by `evaluator`. Args: output_path (str): output path to indicate where to save best models evaluator (Engine): evaluation engine used to provide the score model (nn.Module): model to store metric_name (str): metric name to use for score evaluation. This metric should be present in `evaluator.state.metrics`. n_saved (int, optional): number of best models to store trainer (Engine, optional): trainer engine to fetch the epoch when saving the best model. tag (str, optional): score name prefix: `{tag}_{metric_name}`. By default, tag is "val". Returns: A :class:`~ignite.handlers.checkpoint.Checkpoint` handler. """global_step_transform=NoneiftrainerisnotNone:global_step_transform=global_step_from_engine(trainer)best_model_handler=Checkpoint({"model":model,},DiskSaver(dirname=output_path,require_empty=False),filename_prefix="best",n_saved=n_saved,global_step_transform=global_step_transform,score_name="{}_{}".format(tag,metric_name.lower()),score_function=get_default_score_fn(metric_name),)evaluator.add_event_handler(Events.COMPLETED,best_model_handler,)returnbest_model_handler
[docs]defadd_early_stopping_by_val_score(patience,evaluator,trainer,metric_name):"""Method setups early stopping handler based on the score (named by `metric_name`) provided by `evaluator`. Args: patience (int): number of events to wait if no improvement and then stop the training. evaluator (Engine): evaluation engine used to provide the score trainer (Engine): trainer engine to stop the run if no improvement. metric_name (str): metric name to use for score evaluation. This metric should be present in `evaluator.state.metrics`. Returns: A :class:`~ignite.handlers.early_stopping.EarlyStopping` handler. """es_handler=EarlyStopping(patience=patience,score_function=get_default_score_fn(metric_name),trainer=trainer)evaluator.add_event_handler(Events.COMPLETED,es_handler)returnes_handler