# Isaac Gym Legged Robot Environment This project provides reinforcement learning environments for training legged robots to walk on rough terrain using NVIDIA's Isaac Gym physics simulator. Developed by ETH Zurich's Robotic Systems Lab, it enables sim-to-real transfer through realistic simulation with actuator modeling, domain randomization (friction, mass, noise), and dynamics that match real-world robot behavior. The framework supports multiple quadrupedal robots including ANYmal-C, ANYmal-B, Unitree A1, and Cassie. The environments implement a modular architecture using config-based customization and inheritance. Each robot environment consists of an environment class inheriting from `LeggedRobot` and configuration classes (`LeggedRobotCfg` for environment parameters, `LeggedRobotCfgPPO` for training parameters). The framework integrates with the RSL-RL library for PPO training and provides terrain generation, reward shaping, observation handling, and policy deployment capabilities. ## Training a Robot Policy Train a legged robot to walk using PPO reinforcement learning. ```bash # Train ANYmal-C on flat terrain python legged_gym/scripts/train.py --task=anymal_c_flat # Train on rough terrain with 8000 parallel environments python legged_gym/scripts/train.py --task=anymal_c_rough --num_envs=8000 # Train headless (no rendering) on CPU simulation with GPU RL python legged_gym/scripts/train.py --task=a1 --headless --sim_device=cpu --rl_device=cuda:0 # Resume training from checkpoint python legged_gym/scripts/train.py --task=anymal_c_flat --resume --load_run=Dec01_10-30-45_experiment --checkpoint=500 # Custom experiment configuration python legged_gym/scripts/train.py \ --task=cassie \ --experiment_name=my_experiment \ --run_name=test_run \ --max_iterations=1000 \ --seed=42 ``` ## Evaluating a Trained Policy Load and visualize a trained policy in the simulator. ```bash # Play the most recent trained model python legged_gym/scripts/play.py --task=anymal_c_flat # Play specific checkpoint python legged_gym/scripts/play.py --task=anymal_c_rough --load_run=Dec15_14-22-30_rough_terrain --checkpoint=800 ``` ```python # Example play.py workflow from legged_gym.envs import * from legged_gym.utils import task_registry, get_args args = get_args() # Get configurations and override for testing env_cfg, train_cfg = task_registry.get_cfgs(name=args.task) env_cfg.env.num_envs = 50 env_cfg.terrain.curriculum = False env_cfg.noise.add_noise = False # Create environment env, _ = task_registry.make_env(name=args.task, args=args, env_cfg=env_cfg) obs = env.get_observations() # Load trained policy train_cfg.runner.resume = True ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name=args.task, args=args, train_cfg=train_cfg) policy = ppo_runner.get_inference_policy(device=env.device) # Run simulation loop for i in range(1000): actions = policy(obs.detach()) obs, privileged_obs, rewards, dones, infos = env.step(actions.detach()) ``` ## Registering a Custom Environment Register a new robot environment for training. ```python from legged_gym.envs.base.legged_robot import LeggedRobot from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO from legged_gym.utils.task_registry import task_registry # Define environment configuration class MyRobotCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 4096 num_observations = 48 num_actions = 12 class asset(LeggedRobotCfg.asset): file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/my_robot/urdf/my_robot.urdf' name = "my_robot" foot_name = "foot" penalize_contacts_on = ["thigh", "shank"] terminate_after_contacts_on = ["base"] self_collisions = 1 class init_state(LeggedRobotCfg.init_state): pos = [0.0, 0.0, 0.5] default_joint_angles = { "hip_joint": 0.0, "thigh_joint": 0.8, "calf_joint": -1.5, } class control(LeggedRobotCfg.control): stiffness = {'hip_joint': 80.0, 'thigh_joint': 80.0, 'calf_joint': 80.0} damping = {'hip_joint': 2.0, 'thigh_joint': 2.0, 'calf_joint': 2.0} action_scale = 0.5 decimation = 4 class rewards(LeggedRobotCfg.rewards): class scales: tracking_lin_vel = 1.0 tracking_ang_vel = 0.5 lin_vel_z = -2.0 ang_vel_xy = -0.05 orientation = -5.0 torques = -0.00001 dof_vel = -0.0 feet_air_time = 1.0 collision = -1.0 action_rate = -0.01 # Define training configuration class MyRobotCfgPPO(LeggedRobotCfgPPO): class algorithm(LeggedRobotCfgPPO.algorithm): value_loss_coef = 1.0 use_clipped_value_loss = True clip_param = 0.2 entropy_coef = 0.01 num_learning_epochs = 5 num_mini_batches = 4 learning_rate = 1.e-3 class runner(LeggedRobotCfgPPO.runner): experiment_name = 'my_robot_experiment' run_name = '' max_iterations = 500 # Register the environment task_registry.register("my_robot", LeggedRobot, MyRobotCfg(), MyRobotCfgPPO()) ``` ## Creating a Custom Environment with Custom Rewards Implement custom reward functions and environment behavior. ```python from legged_gym.envs.base.legged_robot import LeggedRobot import torch class MyCustomRobot(LeggedRobot): def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): super().__init__(cfg, sim_params, physics_engine, sim_device, headless) def _reward_custom_stability(self): """Reward for maintaining stable base orientation""" # Penalize roll and pitch deviations from upright return torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1) def _reward_foot_clearance(self): """Reward for lifting feet during swing phase""" # Measure foot height above ground during swing foot_heights = self.root_states[:, 2].unsqueeze(1) - self.rigid_body_state[:, self.feet_indices, 2] in_contact = self.contact_forces[:, self.feet_indices, 2] > 1.0 clearance = torch.where(in_contact, torch.zeros_like(foot_heights), foot_heights) return torch.mean(torch.clamp(clearance - 0.1, min=0.0), dim=1) def _reward_smooth_gait(self): """Reward for smooth velocity changes""" base_vel_change = torch.sum(torch.abs(self.base_lin_vel - self.last_root_vel[:, :3]), dim=1) return torch.exp(-2.0 * base_vel_change) def compute_observations(self): """Add custom observations""" # Call parent implementation super().compute_observations() # Add custom observations if needed # Example: add terrain height measurements relative to feet if self.cfg.terrain.measure_heights: heights = self._get_heights() self.obs_buf = torch.cat([self.obs_buf, heights], dim=-1) # Configuration with custom reward scales class MyCustomRobotCfg(LeggedRobotCfg): class rewards: class scales: custom_stability = -2.0 foot_clearance = 1.0 smooth_gait = 0.5 tracking_lin_vel = 1.0 tracking_ang_vel = 0.5 ``` ## Task Registry API Core API for environment and algorithm management. ```python from legged_gym.utils.task_registry import TaskRegistry, task_registry from legged_gym.utils import get_args # Get command line arguments args = get_args() # Retrieve environment configurations env_cfg, train_cfg = task_registry.get_cfgs(name="anymal_c_flat") print(f"Number of observations: {env_cfg.env.num_observations}") print(f"Max iterations: {train_cfg.runner.max_iterations}") # Create environment instance env, env_cfg = task_registry.make_env( name="anymal_c_flat", args=args, env_cfg=None # or pass custom config to override ) # Create PPO algorithm runner ppo_runner, train_cfg = task_registry.make_alg_runner( env=env, name="anymal_c_flat", args=args, train_cfg=None, # or pass custom config log_root="default" # default is LEGGED_GYM_ROOT/logs/experiment_name ) # Train the policy ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True) # Get inference policy for deployment policy = ppo_runner.get_inference_policy(device=env.device) obs = env.get_observations() actions = policy(obs) ``` ## Terrain Generation Configure procedural terrain generation for training robustness. ```python from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg class RoughTerrainCfg(LeggedRobotCfg): class terrain(LeggedRobotCfg.terrain): mesh_type = 'trimesh' # 'none', 'plane', 'heightfield', 'trimesh' horizontal_scale = 0.1 # [m] resolution vertical_scale = 0.005 # [m] height resolution border_size = 25 # [m] border around terrain # Curriculum learning curriculum = True max_init_terrain_level = 5 # Terrain generation terrain_length = 8.0 # [m] terrain_width = 8.0 # [m] num_rows = 10 # difficulty levels num_cols = 20 # terrain types # Terrain type distribution # [smooth_slope, rough_slope, stairs_up, stairs_down, discrete_obstacles] terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2] # Physics properties static_friction = 1.0 dynamic_friction = 1.0 restitution = 0.0 # Height measurements for observations measure_heights = True measured_points_x = [-0.8, -0.6, -0.4, -0.2, 0., 0.2, 0.4, 0.6, 0.8] measured_points_y = [-0.5, -0.3, -0.1, 0.1, 0.3, 0.5] # Trimesh specific slope_treshold = 0.75 # slopes above threshold become vertical # For flat terrain training class FlatTerrainCfg(LeggedRobotCfg): class terrain(LeggedRobotCfg.terrain): mesh_type = 'plane' measure_heights = False ``` ## Domain Randomization Configuration Configure domain randomization for sim-to-real transfer. ```python from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg class DomainRandCfg(LeggedRobotCfg): class domain_rand: randomize_friction = True friction_range = [0.5, 1.25] randomize_base_mass = True added_mass_range = [-1.0, 1.0] # [kg] push_robots = True push_interval_s = 15 max_push_vel_xy = 1.0 # [m/s] randomize_gains = True stiffness_multiplier_range = [0.9, 1.1] damping_multiplier_range = [0.9, 1.1] randomize_com = True com_displacement_range = [-0.05, 0.05] # [m] class noise: add_noise = True noise_level = 1.0 # scales other values class noise_scales: dof_pos = 0.01 dof_vel = 1.5 lin_vel = 0.1 ang_vel = 0.2 gravity = 0.05 height_measurements = 0.1 ``` ## Exporting Policy for Deployment Export trained policy as TorchScript for C++ deployment. ```python from legged_gym.utils.helpers import export_policy_as_jit from legged_gym import LEGGED_GYM_ROOT_DIR import os # After training, export the policy ppo_runner, train_cfg = task_registry.make_alg_runner(env=env, name="anymal_c_flat", args=args) # Export path export_path = os.path.join( LEGGED_GYM_ROOT_DIR, 'logs', train_cfg.runner.experiment_name, 'exported', 'policies' ) # Export as JIT-compiled model export_policy_as_jit(ppo_runner.alg.actor_critic, export_path) print(f'Exported policy to: {export_path}') # For LSTM policies (recurrent networks) # The exporter handles state management automatically # Exported model will have reset_memory() method # Load in C++: # torch::jit::script::Module policy = torch::jit::load("policy_1.pt"); # torch::Tensor action = policy.forward({observation}).toTensor(); ``` ## Environment Step Interface Core simulation step and observation interface. ```python import torch # Environment step returns 5 values obs, privileged_obs, rewards, dones, infos = env.step(actions) # obs: (num_envs, num_observations) - clipped observations for policy # privileged_obs: (num_envs, num_privileged_obs) or None - critic observations for asymmetric training # rewards: (num_envs,) - reward for each environment # dones: (num_envs,) - reset flags for each environment # infos: dict - additional information # Example training loop num_steps = 1000 obs = env.get_observations() for step in range(num_steps): # Policy forward pass actions = policy(obs) # Clamp actions to valid range actions = torch.clamp(actions, -1.0, 1.0) # Step environment obs, privileged_obs, rewards, dones, infos = env.step(actions) # Check for episode completions if infos.get("episode"): # Episode statistics available print(f"Episode reward: {infos['episode']['r']}") print(f"Episode length: {infos['episode']['l']}") # Environments are automatically reset when done=True # No manual reset needed # Direct observation access base_lin_vel = env.base_lin_vel # (num_envs, 3) base_ang_vel = env.base_ang_vel # (num_envs, 3) dof_pos = env.dof_pos # (num_envs, num_dof) dof_vel = env.dof_vel # (num_envs, num_dof) commands = env.commands # (num_envs, num_commands) ``` This framework enables efficient training of legged locomotion policies through massively parallel simulation, supporting thousands of environments running simultaneously on GPU. The modular architecture allows researchers to easily customize robot morphology, terrain generation, reward functions, and training parameters for their specific applications. Domain randomization and noise injection during training ensure trained policies transfer successfully to real robots. The typical workflow involves: (1) registering a robot environment with its URDF/MJCF asset and configuration parameters, (2) training a policy using PPO with curriculum learning on diverse terrains, (3) evaluating the policy in simulation with various test scenarios, and (4) exporting the policy as TorchScript for deployment on physical robots. The framework has been validated on multiple quadrupedal platforms demonstrating robust locomotion on challenging terrains including stairs, slopes, and rough outdoor environments.