Source code for schola.scripts.ray.settings

Copied!


# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
"""
This module contains the settings dataclasses for the RLlib script
"""
import argparse
from typing import Any, Dict, List, Optional, Type, Union
from dataclasses import dataclass, field
from ray.rllib.algorithms.appo.appo import APPOConfig
from ray.rllib.algorithms.impala.impala import IMPALAConfig
from ray.rllib.algorithms.ppo.ppo import PPOConfig
from schola.scripts.common import (
    ActivationFunctionEnum,
    ScriptArgs,
    RLLibLauncherExtension,
)


[docs] class RLLibAlgorithmSpecificSettings(): """ Base Class for RLLib algorithm specific settings. This class is intended to be inherited by specific algorithm settings classes (e.g., PPOSettings, IMPALASettings, etc.). """
[docs] def get_settings_dict(self) -> Dict[str,Any]: """ Get the settings as a dictionary keyed by the correct parameter name in Ray """ ...
[docs] @classmethod def get_parser(cls): """ Add the settings to the parser or subparser """ ...
[docs] @dataclass class PPOSettings(RLLibAlgorithmSpecificSettings): """ Dataclass for PPO (Proximal Policy Optimization) algorithm specific settings. This class defines the parameters used in the PPO algorithm, including GAE lambda, clip parameter, and whether to use GAE. """ gae_lambda: float = 0.95 #: The lambda parameter for Generalized Advantage Estimation (GAE). This controls the trade-off between bias and variance in the advantage estimation. clip_param: float = 0.2 #: The clip parameter for the PPO algorithm. This is the epsilon value used in the clipped surrogate objective function. It helps to limit the policy update step size to prevent large changes that could lead to performance collapse. use_gae: bool = True #: Whether to use Generalized Advantage Estimation (GAE) for advantage calculation. GAE is a method to reduce the variance of the advantage estimates while keeping bias low. If set to False, the standard advantage calculation will be used instead. @property def rllib_config(self) -> Type[PPOConfig]: return PPOConfig @property def name(self) -> str: return "PPO"
[docs] def get_settings_dict(self): return { "lambda_" : self.gae_lambda, "use_gae": self.use_gae, "clip_param" : self.clip_param }
[docs] @classmethod def get_parser(cls): parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--disable-gae", dest="use_gae", action="store_false", help="Disable the Generalized Advantage Estimation (GAE) for the PPO algorithm") parser.add_argument("--gae-lambda", type=float, default=0.95, help="The GAE lambda value for the PPO algorithm") parser.add_argument("--clip-param", type=float, default=0.2, help="The clip range for the PPO algorithm") parser.set_defaults(algorithm_settings_class=PPOSettings) return parser
[docs] @dataclass class IMPALASettings(RLLibAlgorithmSpecificSettings): """ Dataclass for IMPALA (Importance Weighted Actor-Learner Architecture) algorithm specific settings. This class defines the parameters used in the IMPALA algorithm, including V-trace settings for off-policy correction. """ vtrace: bool = True #: Whether to use the V-trace algorithm for off-policy correction in the IMPALA algorithm. V-trace is a method to correct the bias introduced by using off-policy data for training. It helps to ensure that the value estimates are more accurate and stable. vtrace_clip_rho_threshold: float = 1.0 #: The clip threshold for V-trace rho values. vtrace_clip_pg_rho_threshold: float = 1.0 #: The clip threshold for V-trace rho values in the policy gradient. @property def rllib_config(self) -> Type[IMPALAConfig]: return IMPALAConfig @property def name(self) -> str: return "IMPALA"
[docs] def get_settings_dict(self): return { "vtrace": self.vtrace, "vtrace_clip_rho_threshold": self.vtrace_clip_rho_threshold, "vtrace_clip_pg_rho_threshold": self.vtrace_clip_pg_rho_threshold }
[docs] @classmethod def get_parser(cls): parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--disable-vtrace", dest="vtrace",action="store_false",help="Disable the V-trace algorithm") parser.add_argument("--vtrace-clip-rho-threshold", type=float, default=1.0, help="The clip threshold for V-trace rho values") parser.add_argument("--vtrace-clip-pg-rho-threshold", type=float, default=1.0, help="The clip threshold for V-trace rho values in the policy gradient") parser.set_defaults(algorithm_settings_class=IMPALASettings) return parser
[docs] @dataclass class APPOSettings(IMPALASettings, PPOSettings): """ Dataclass for APPO (Asynchronous Proximal Policy Optimization) algorithm specific settings. This class inherits from both IMPALASettings and PPOSettings to combine the settings for both algorithms. This allows for the use of both V-trace for off-policy correction and PPO for policy optimization in a single algorithm. """ @property def rllib_config(self) -> Type[APPOConfig]: return APPOConfig @property def name(self) -> str: return "APPO"
[docs] def get_settings_dict(self): base_imapala_dict = IMPALASettings.get_settings_dict(self) base_ppo_dict = PPOSettings.get_settings_dict(self) return {**base_imapala_dict, **base_ppo_dict}
[docs] @classmethod def get_parser(cls): parser = argparse.ArgumentParser(parents=[IMPALASettings.get_parser(), PPOSettings.get_parser()],add_help=False) parser.set_defaults(algorithm_settings_class=APPOSettings) return parser
[docs] @dataclass class TrainingSettings(): """ Dataclass for generic training settings used in the RLlib training process. This class defines the parameters for training, including the number of timesteps, learning rate, minibatch size, and other hyperparameters that control the training process. These settings are applicable to any RLlib algorithm and can be customized based on the specific requirements of the training job. """ timesteps: int = 3000 #: The number of timesteps to train for. This is the total number of timesteps to run during training. learning_rate: float = 0.0003 #: The learning rate for any chosen algorithm. This controls how much to adjust the model weights in response to the estimated error each time the model weights are updated. A smaller value means slower learning, while a larger value means faster learning. minibatch_size: int = 128 #: The size of the minibatch for training. This is the number of samples used in each iteration of training to update the model weights. A larger batch size can lead to more stable estimates of the gradient, but requires more memory and can slow down training if too large. train_batch_size_per_learner: int = 256 #: The number of samples given to each learner during training. Must be divisble by minibatch_size. num_sgd_iter: int = 5 #: The number of stochastic gradient descent (SGD) iterations for each batch. This is the number of times to update the model weights using the samples in the minibatch. More iterations can lead to better convergence, but also increases the training time. gamma: float = 0.99 #: The discount factor for the reinforcement learning algorithm. This is used to calculate the present value of future rewards. A value of 0.99 means that future rewards are discounted by 1% for each time step into the future. This helps to balance the importance of immediate versus future rewards in the training process. A value closer to 1.0 will prioritize future rewards more heavily, while a value closer to 0 will prioritize immediate rewards. @property def name(self) -> str: return "Training Settings"
[docs] @classmethod def populate_arg_group(cls, args_group): args_group.add_argument("-t", "--timesteps", type=int, default=3000, help="Number of timesteps to train for") args_group.add_argument("--learning-rate", type=float, default=0.0003, help="Learning rate for the PPO algorithm") args_group.add_argument("--minibatch-size", type=int, default=128, help="The size of the minibatch for training. Taken from the train batch given to each learner") args_group.add_argument("--train-batch-size-per-learner", type=int, default=256, help="Size of the minibatch given to each learner") args_group.add_argument("--num-sgd-iter", type=int, default=5, help="The number of SGD iterations for each batch") args_group.add_argument("--gamma", type=float, default=0.99, help="The discount factor for the PPO algorithm")
[docs] @dataclass class ResourceSettings(): """ Dataclass for resource settings used in the RLlib training process. This class defines the parameters for allocating computational resources, including the number of GPUs and CPUs to use for the training job. These settings help to control how resources are allocated for the training process, which can impact performance and training times. This is especially important when running on a cluster or distributed environment. """ num_gpus: Optional[int] = 0 #: The number of GPUs to use for the training process. This specifies how many GPUs are available for the RLlib training job. If set to 0, it will default to CPU training. This can be used to leverage GPU acceleration for faster training times if available. num_cpus: Optional[int] = 1 #: The total number of CPUs to use for the training process. This specifies how many CPU cores are available for the RLlib training job. This can be used to parallelize the training process across multiple CPU cores, which can help to speed up training times. num_learners: Optional[int] = 0 #: The number of learner processes to use for the training job. This specifies how many parallel learner processes will be used to train the model. Each learner will process a portion of the training data and update the model weights independently. This can help to speed up training times by leveraging multiple CPU cores or GPUs. num_cpus_for_main_process: Optional[int] = 1 #: The number of CPUs to allocate for the main process. This is the number of CPU cores that will be allocated to the main process that manages the training job. This can be used to ensure that the main process has enough resources to handle the workload and manage the learner processes effectively. num_cpus_per_learner: Optional[int] = 1 #: The number of CPUs to allocate for each learner process. This specifies how many CPU cores will be allocated to each individual learner process that is used for training. This can be used to ensure that each learner has enough resources to handle its workload and process the training data efficiently. num_gpus_per_learner: Optional[int] = 0 #: The number of GPUs to allocate for each learner process. This specifies how many GPUs will be allocated to each individual learner process that is used for training. uster: Optional[bool] = False @property def name(self) -> str: return "Resource Settings"
[docs] @classmethod def populate_arg_group(cls, args_group): args_group.add_argument("--num-gpus", type=int, default=0, help="Number of GPUs to use") args_group.add_argument("--num-cpus", type=int, default=1, help="Number of CPUs to use") args_group.add_argument("--num-cpus-per-learner", type=int, default=1, help="Number of CPUs to use per learner process") args_group.add_argument("--num-gpus-per-learner", type=int, default=0, help="Number of GPUs to use per learner process") args_group.add_argument("--num-learners", type=int, default=0, help="Number of learner processes to use") args_group.add_argument("--num-cpus-for-main-process", type=int, default=1, help="Number of CPUs to use for the main process") args_group.add_argument("--using-cluster", action="store_true", help="Whether Ray is running on a cluster")
[docs] @dataclass class LoggingSettings(): """ Dataclass for logging settings used in the RLlib training process. This class defines the verbosity levels for logging in both the Schola environment and RLlib. These settings help to control the amount of logging information generated during the training process, which can be useful for debugging and understanding the training process. Adjusting these settings can help to balance the amount of information logged against performance and readability of the logs. """ schola_verbosity: int = 0 #: Verbosity level for the Schola environment. This controls the level of detail in the logs generated by the Schola environment. A higher value will produce more detailed logs, which can be useful for debugging and understanding the training process. Default is 0 (no additional logging). rllib_verbosity: int = 1 #: Verbosity level for RLlib. This controls the level of detail in the logs generated by RLlib. A higher value will produce more detailed logs, which can be useful for debugging and understanding the training process. Default is 1 (standard logging). @property def name(self) -> str: return "Logging Settings"
[docs] @classmethod def populate_arg_group(cls, args_group): args_group.add_argument("-scholav", "--schola-verbosity", type=int, default=0, help="Verbosity level for the Schola environment") args_group.add_argument("-rllibv", "--rllib-verbosity", type=int, default=1, help="Verbosity level for RLlib")
[docs] @dataclass class ResumeSettings(): """ Dataclass for resume settings used in the RLlib training process. This class defines the parameters for resuming training from a saved checkpoint. This allows you to continue training from a previously saved model checkpoint instead of starting from scratch. This is useful for long training jobs or if you want to experiment with different hyperparameters without losing progress. """ resume_from: Optional[str] = None #: Path to a checkpoint to resume training from. This allows you to continue training from a previously saved model checkpoint instead of starting from scratch. This is useful for long training jobs or if you want to experiment with different hyperparameters without losing progress. If set to None, training will start from scratch. @property def name(self) -> str: return "Resume Settings"
[docs] @classmethod def populate_arg_group(cls,args_group): args_group.add_argument("--resume-from", type=str, default=None, help="Path to checkpoint to resume from")
[docs] @dataclass class NetworkArchitectureSettings(): """ Dataclass for network architecture settings used in the RLlib training process. This class defines the parameters for the neural network architecture used for policy and value function approximation. This includes the hidden layer sizes, activation functions, and whether to use an attention mechanism. These settings help to control the complexity and capacity of the neural network model used in the training process. """ fcnet_hiddens: List[int] = field(default_factory=lambda: [512, 512]) #: The hidden layer architecture for the fully connected network. This specifies the number of neurons in each hidden layer of the neural network used for the policy and value function approximation. The default is [512, 512], which means two hidden layers with 512 neurons each. This can be adjusted based on the complexity of the problem and the size of the input state space. activation: ActivationFunctionEnum = ActivationFunctionEnum.ReLU #: The activation function to use for the fully connected network. This specifies the non-linear activation function applied to each neuron in the hidden layers of the neural network. The default is ReLU (Rectified Linear Unit), which is a commonly used activation function in deep learning due to its simplicity and effectiveness. Other options may include Tanh, Sigmoid, etc. This can be adjusted based on the specific requirements of the problem and the architecture of the neural network. use_attention: bool = False #: Whether to use an attention mechanism in the model. This specifies whether to include an attention layer in the neural network architecture. Note, this attends does not attend over the inputs but rather the timestep dimension. attention_dim: int = 64 #: The dimension of the attention layer. This specifies the size of the output from the attention mechanism if `use_attention` is set to True. The attention dimension determines how many features will be used to represent the output of the attention layer. A larger value may allow for more complex representations but will also increase the computational cost. The default is 64, which is a common choice for many applications. @property def name(self) -> str: return "Network Architecture Settings"
[docs] @classmethod def populate_arg_group(cls, args_group): args_group.add_argument( "--fcnet-hiddens", nargs="+", type=int, default=[512, 512], help="Hidden layer architecture for the fully connected network" ) args_group.add_argument( "--activation", type=ActivationFunctionEnum, default=ActivationFunctionEnum.ReLU, help="Activation function for the fully connected network", ) args_group.add_argument( "--use-attention", action="store_true", help="Whether to use attention in the model", ) args_group.add_argument( "--attention-dim", type=int, default=64, help="The dimension of the attention layer", )
[docs] @dataclass class RLlibScriptArgs(ScriptArgs): """ Top level dataclass for RLlib script arguments. This class aggregates all the settings required for configuring the RLlib training process. It includes settings for training, algorithms, logging, resuming from checkpoints, network architecture, and resource allocation. This allows for a comprehensive configuration of the RLlib training job in a structured manner. """ # Training Arguments training_settings: TrainingSettings = field(default_factory=TrainingSettings) # Training Algorithm Arguments algorithm_settings: Union[PPOSettings, APPOSettings, IMPALASettings] = field(default_factory=PPOSettings) # Logging Arguments logging_settings: LoggingSettings = field(default_factory=LoggingSettings) # Resume Arguments resume_settings: ResumeSettings = field(default_factory=ResumeSettings) # Network Architecture Arguments network_architecture_settings: NetworkArchitectureSettings = field(default_factory=NetworkArchitectureSettings) #Resource Arguments resource_settings: ResourceSettings = field(default_factory=ResourceSettings) # List of plugins plugins: List[RLLibLauncherExtension] = field(default_factory=lambda: [])

Related pages

  • Visit the Schola product page for download links and more information.

Looking for more documentation on GPUOpen?

AMD GPUOpen software blogs

Our handy software release blogs will help you make good use of our tools, SDKs, and effects, as well as sharing the latest features with new releases.

GPUOpen Manuals

Don’t miss our manual documentation! And if slide decks are what you’re after, you’ll find 100+ of our finest presentations here.

AMD GPUOpen Performance Guides

The home of great performance and optimization advice for AMD RDNA™ 2 GPUs, AMD Ryzen™ CPUs, and so much more.

Getting started: AMD GPUOpen software

New or fairly new to AMD’s tools, libraries, and effects? This is the best place to get started on GPUOpen!

AMD GPUOpen Getting Started Development and Performance

Looking for tips on getting started with developing and/or optimizing your game, whether on AMD hardware or generally? We’ve got you covered!

AMD GPUOpen Technical blogs

Browse our technical blogs, and find valuable advice on developing with AMD hardware, ray tracing, Vulkan®, DirectX®, Unreal Engine, and lots more.

Find out more about our software!

AMD GPUOpen Effects - AMD FidelityFX technologies

Create wonder. No black boxes. Meet the AMD FidelityFX SDK!

AMD GPUOpen Samples

Browse all our useful samples. Perfect for when you’re needing to get started, want to integrate one of our libraries, and much more.

AMD GPUOpen developer SDKs

Discover what our SDK technologies can offer you. Query hardware or software, manage memory, create rendering applications or machine learning, and much more!

AMD GPUOpen Developer Tools

Analyze, Optimize, Profile, Benchmark. We provide you with the developer tools you need to make sure your game is the best it can be!