Shortcuts

Source code for ignite.utils

import collections.abc as collections
import functools
import hashlib
import logging
import numbers
import random
import shutil
import warnings
from pathlib import Path
from typing import Any, Callable, cast, Dict, List, Optional, TextIO, Tuple, Type, TypeVar, Union

import torch

__all__ = [
    "convert_tensor",
    "apply_to_tensor",
    "apply_to_type",
    "_to_str_list",
    "to_onehot",
    "setup_logger",
    "manual_seed",
    "hash_checkpoint",
]


[docs]def convert_tensor( x: Union[torch.Tensor, collections.Sequence, collections.Mapping, str, bytes], device: Optional[Union[str, torch.device]] = None, non_blocking: bool = False, ) -> Union[torch.Tensor, collections.Sequence, collections.Mapping, str, bytes]: """Move tensors to relevant device. Args: x: input tensor or mapping, or sequence of tensors. device: device type to move ``x``. non_blocking: convert a CPU Tensor with pinned memory to a CUDA Tensor asynchronously with respect to the host if possible """ def _func(tensor: torch.Tensor) -> torch.Tensor: return tensor.to(device=device, non_blocking=non_blocking) if device is not None else tensor return apply_to_tensor(x, _func)
[docs]def apply_to_tensor( x: Union[torch.Tensor, collections.Sequence, collections.Mapping, str, bytes], func: Callable ) -> Union[torch.Tensor, collections.Sequence, collections.Mapping, str, bytes]: """Apply a function on a tensor or mapping, or sequence of tensors. Args: x: input tensor or mapping, or sequence of tensors. func: the function to apply on ``x``. """ return apply_to_type(x, torch.Tensor, func)
[docs]def apply_to_type( x: Union[Any, collections.Sequence, collections.Mapping, str, bytes], input_type: Union[Type, Tuple[Type[Any], Any]], func: Callable, ) -> Union[Any, collections.Sequence, collections.Mapping, str, bytes]: """Apply a function on an object of `input_type` or mapping, or sequence of objects of `input_type`. Args: x: object or mapping or sequence. input_type: data type of ``x``. func: the function to apply on ``x``. """ if isinstance(x, input_type): return func(x) if isinstance(x, (str, bytes)): return x if isinstance(x, collections.Mapping): return cast(Callable, type(x))({k: apply_to_type(sample, input_type, func) for k, sample in x.items()}) if isinstance(x, tuple) and hasattr(x, "_fields"): # namedtuple return cast(Callable, type(x))(*(apply_to_type(sample, input_type, func) for sample in x)) if isinstance(x, collections.Sequence): return cast(Callable, type(x))([apply_to_type(sample, input_type, func) for sample in x]) raise TypeError((f"x must contain {input_type}, dicts or lists; found {type(x)}"))
def _tree_map( func: Callable, x: Union[Any, collections.Sequence, collections.Mapping], key: Optional[Union[int, str]] = None ) -> Union[Any, collections.Sequence, collections.Mapping]: if isinstance(x, collections.Mapping): return cast(Callable, type(x))({k: _tree_map(func, sample, key=k) for k, sample in x.items()}) if isinstance(x, tuple) and hasattr(x, "_fields"): # namedtuple return cast(Callable, type(x))(*(_tree_map(func, sample) for sample in x)) if isinstance(x, collections.Sequence): return cast(Callable, type(x))([_tree_map(func, sample, key=i) for i, sample in enumerate(x)]) return func(x, key=key) def _to_str_list(data: Any) -> List[str]: """ Recursively flattens and formats complex data structures, including keys for dictionaries, into a list of human-readable strings. This function processes nested dictionaries, lists, tuples, numbers, and PyTorch tensors, formatting numbers to four decimal places and handling tensors with special formatting rules. It's particularly useful for logging, debugging, or any scenario where a human-readable representation of complex, nested data structures is required. The function handles the following types: - Numbers: Formatted to four decimal places. - PyTorch tensors: - Scalars are formatted to four decimal places. - 1D tensors with more than 10 elements show the first 10 elements followed by an ellipsis. - 1D tensors with 10 or fewer elements are fully listed. - Multi-dimensional tensors display their shape. - Dictionaries: Each key-value pair is included in the output with the key as a prefix. - Lists and tuples: Flattened and included in the output. Empty lists/tuples are represented by an empty string. - None values: Represented by an empty string. Args: data: The input data to be flattened and formatted. It can be a nested combination of dictionaries, lists, tuples, numbers, and PyTorch tensors. Returns: A list of formatted strings, each representing a part of the input data structure. """ formatted_items: List[str] = [] def format_item(item: Any, prefix: str = "") -> Optional[str]: if isinstance(item, numbers.Number): return f"{prefix}{item:.4f}" elif torch.is_tensor(item): if item.dim() == 0: return f"{prefix}{item.item():.4f}" # Format scalar tensor without brackets elif item.dim() == 1 and item.size(0) > 10: return f"{prefix}[" + ", ".join(f"{x.item():.4f}" for x in item[:10]) + ", ...]" elif item.dim() == 1: return f"{prefix}[" + ", ".join(f"{x.item():.4f}" for x in item) + "]" else: return f"{prefix}Shape{list(item.shape)}" elif isinstance(item, dict): for key, value in item.items(): formatted_value = format_item(value, f"{key}: ") if formatted_value is not None: formatted_items.append(formatted_value) elif isinstance(item, (list, tuple)): if not item: if prefix: formatted_items.append(f"{prefix}") else: values = [format_item(x) for x in item] values_str = [v for v in values if v is not None] if values_str: formatted_items.append(f"{prefix}" + ", ".join(values_str)) elif item is None: if prefix: formatted_items.append(f"{prefix}") return None # Directly handle single numeric values if isinstance(data, numbers.Number): return [f"{data:.4f}"] format_item(data) return formatted_items class _CollectionItem: types_as_collection_item: Tuple = (int, float, torch.Tensor) def __init__(self, collection: Union[Dict, List], key: Union[int, str]) -> None: if not isinstance(collection, (dict, list)): raise TypeError( f"Input type is expected to be a mapping or list, but got {type(collection)} " f"for input key '{key}'." ) if isinstance(collection, list) and isinstance(key, str): raise ValueError("Key should be int for collection of type list") self.collection = collection self.key = key def load_value(self, value: Any) -> None: self.collection[self.key] = value # type: ignore[index] def value(self) -> Any: return self.collection[self.key] # type: ignore[index] @staticmethod def wrap(object: Union[Dict, List], key: Union[int, str], value: Any) -> Union[Any, "_CollectionItem"]: return ( _CollectionItem(object, key) if value is None or isinstance(value, _CollectionItem.types_as_collection_item) else value ) def _tree_apply2( func: Callable, x: Union[Any, List, Dict], y: Union[Any, collections.Sequence, collections.Mapping], ) -> None: if isinstance(x, dict) and isinstance(y, collections.Mapping): for k, v in x.items(): if k not in y: raise ValueError(f"Key '{k}' from x is not found in y: {y.keys()}") _tree_apply2(func, _CollectionItem.wrap(x, k, v), y[k]) elif isinstance(x, list) and isinstance(y, collections.Sequence): if len(x) != len(y): raise ValueError(f"Size of y: {len(y)} does not match the size of x: '{len(x)}'") for i, (v1, v2) in enumerate(zip(x, y)): _tree_apply2(func, _CollectionItem.wrap(x, i, v1), v2) else: return func(x, y)
[docs]def to_onehot(indices: torch.Tensor, num_classes: int) -> torch.Tensor: """Convert a tensor of indices of any shape `(N, ...)` to a tensor of one-hot indicators of shape `(N, num_classes, ...)` and of type uint8. Output's device is equal to the input's device`. Args: indices: input tensor to convert. num_classes: number of classes for one-hot tensor. .. versionchanged:: 0.4.3 This functions is now torchscriptable. """ new_shape = (indices.shape[0], num_classes) + indices.shape[1:] onehot = torch.zeros(new_shape, dtype=torch.uint8, device=indices.device) return onehot.scatter_(1, indices.unsqueeze(1), 1)
[docs]def setup_logger( name: Optional[str] = "ignite", level: int = logging.INFO, stream: Optional[TextIO] = None, format: str = "%(asctime)s %(name)s %(levelname)s: %(message)s", filepath: Optional[str] = None, distributed_rank: Optional[int] = None, reset: bool = False, encoding: Optional[str] = "utf-8", ) -> logging.Logger: """Setups logger: name, level, format etc. Args: name: new name for the logger. If None, the standard logger is used. level: logging level, e.g. CRITICAL, ERROR, WARNING, INFO, DEBUG. stream: logging stream. If None, the standard stream is used (sys.stderr). format: logging format. By default, `%(asctime)s %(name)s %(levelname)s: %(message)s`. filepath: Optional logging file path. If not None, logs are written to the file. distributed_rank: Optional, rank in distributed configuration to avoid logger setup for workers. If None, distributed_rank is initialized to the rank of process. reset: if True, reset an existing logger rather than keep format, handlers, and level. encoding: open the file with the encoding. By default, 'utf-8'. Returns: logging.Logger Examples: Improve logs readability when training with a trainer and evaluator: .. code-block:: python from ignite.utils import setup_logger trainer = ... evaluator = ... trainer.logger = setup_logger("trainer") evaluator.logger = setup_logger("evaluator") trainer.run(data, max_epochs=10) # Logs will look like # 2020-01-21 12:46:07,356 trainer INFO: Engine run starting with max_epochs=5. # 2020-01-21 12:46:07,358 trainer INFO: Epoch[1] Complete. Time taken: 00:5:23 # 2020-01-21 12:46:07,358 evaluator INFO: Engine run starting with max_epochs=1. # 2020-01-21 12:46:07,358 evaluator INFO: Epoch[1] Complete. Time taken: 00:01:02 # ... Every existing logger can be reset if needed .. code-block:: python logger = setup_logger(name="my-logger", format="=== %(name)s %(message)s") logger.info("first message") setup_logger(name="my-logger", format="+++ %(name)s %(message)s", reset=True) logger.info("second message") # Logs will look like # === my-logger first message # +++ my-logger second message Change the level of an existing internal logger .. code-block:: python setup_logger( name="ignite.distributed.launcher.Parallel", level=logging.WARNING ) .. versionchanged:: 0.4.3 Added ``stream`` parameter. .. versionchanged:: 0.4.5 Added ``reset`` parameter. .. versionchanged:: 0.5.1 Argument ``encoding`` added to correctly handle special characters in the file, default "utf-8". """ # check if the logger already exists existing = name is None or name in logging.root.manager.loggerDict # if existing, get the logger otherwise create a new one logger = logging.getLogger(name) if distributed_rank is None: import ignite.distributed as idist distributed_rank = idist.get_rank() # Remove previous handlers if distributed_rank > 0 or reset: if logger.hasHandlers(): for h in list(logger.handlers): logger.removeHandler(h) if distributed_rank > 0: # Add null handler to avoid multiple parallel messages logger.addHandler(logging.NullHandler()) # Keep the existing configuration if not reset if existing and not reset: return logger if distributed_rank == 0: logger.setLevel(level) formatter = logging.Formatter(format) ch = logging.StreamHandler(stream=stream) ch.setLevel(level) ch.setFormatter(formatter) logger.addHandler(ch) if filepath is not None: fh = logging.FileHandler(filepath, encoding=encoding) fh.setLevel(level) fh.setFormatter(formatter) logger.addHandler(fh) # don't propagate to ancestors # the problem here is to attach handlers to loggers # should we provide a default configuration less open ? if name is not None: logger.propagate = False return logger
[docs]def manual_seed(seed: int) -> None: """Setup random state from a seed for `torch`, `random` and optionally `numpy` (if can be imported). Args: seed: Random state seed .. versionchanged:: 0.4.3 Added ``torch.cuda.manual_seed_all(seed)``. .. versionchanged:: 0.4.5 Added ``torch_xla.core.xla_model.set_rng_state(seed)``. """ random.seed(seed) torch.manual_seed(seed) try: import torch_xla.core.xla_model as xm xm.set_rng_state(seed) except ImportError: pass try: import numpy as np np.random.seed(seed) except ImportError: pass
def deprecated( deprecated_in: str, removed_in: str = "", reasons: Tuple[str, ...] = (), raise_exception: bool = False ) -> Callable: F = TypeVar("F", bound=Callable[..., Any]) def decorator(func: F) -> F: func_doc = func.__doc__ if func.__doc__ else "" deprecation_warning = ( f"This function has been deprecated since version {deprecated_in}" + (f" and will be removed in version {removed_in}" if removed_in else "") + ".\n Please refer to the documentation for more details." ) @functools.wraps(func) def wrapper(*args: Any, **kwargs: Dict[str, Any]) -> Callable: if raise_exception: raise DeprecationWarning(deprecation_warning) warnings.warn(deprecation_warning, DeprecationWarning, stacklevel=2) return func(*args, **kwargs) appended_doc = f".. deprecated:: {deprecated_in}" + ("\n\n\t" if len(reasons) > 0 else "") for reason in reasons: appended_doc += "\n\t- " + reason wrapper.__doc__ = f"**Deprecated function**.\n\n {func_doc}{appended_doc}" return cast(F, wrapper) return decorator
[docs]def hash_checkpoint(checkpoint_path: Union[str, Path], output_dir: Union[str, Path]) -> Tuple[Path, str]: """ Hash the checkpoint file in the format of ``<filename>-<hash>.<ext>`` to be used with ``check_hash`` of :func:`torch.hub.load_state_dict_from_url`. Args: checkpoint_path: Path to the checkpoint file. output_dir: Output directory to store the hashed checkpoint file (will be created if not exist). Returns: Path to the hashed checkpoint file, the first 8 digits of SHA256 hash. .. versionadded:: 0.4.8 """ if isinstance(checkpoint_path, str): checkpoint_path = Path(checkpoint_path) if not checkpoint_path.exists(): raise FileNotFoundError(f"{checkpoint_path.name} does not exist in {checkpoint_path.parent}.") if isinstance(output_dir, str): output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) hash_obj = hashlib.sha256() # taken from https://github.com/pytorch/vision/blob/main/references/classification/utils.py with checkpoint_path.open("rb") as f: # Read and update hash string value in blocks of 4KB for byte_block in iter(lambda: f.read(4096), b""): hash_obj.update(byte_block) sha_hash = hash_obj.hexdigest() old_filename = checkpoint_path.stem new_filename = "-".join((old_filename, sha_hash[:8])) + ".pt" hash_checkpoint_path = output_dir / new_filename shutil.move(str(checkpoint_path), hash_checkpoint_path) return hash_checkpoint_path, sha_hash

© Copyright 2024, PyTorch-Ignite Contributors. Last updated on 10/02/2024, 2:46:15 PM.

Built with Sphinx using a theme provided by Read the Docs.