upload via upload_folder 2025-08-07T18:45:06.794303+00:00
Browse files- README.md +49 -0
- eval_result.json +6 -0
- full_model.pt +3 -0
- params.json +53 -0
- replay.mp4 +0 -0
- state_dict.pt +3 -0
- tensorboard/events.out.tfevents.1754505352.winkindeMacBook-Air.local.87385.0 +3 -0
README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
env_name: Pusher-v5
|
| 3 |
+
tags:
|
| 4 |
+
- Pusher-v5
|
| 5 |
+
- ppo
|
| 6 |
+
- reinforcement-learning
|
| 7 |
+
- custom-implementation
|
| 8 |
+
- mujoco
|
| 9 |
+
- pytorch
|
| 10 |
+
- ddp
|
| 11 |
+
model-index:
|
| 12 |
+
- name: PPO-DDP-PusherV2
|
| 13 |
+
results:
|
| 14 |
+
- task:
|
| 15 |
+
type: reinforcement-learning
|
| 16 |
+
name: reinforcement-learning
|
| 17 |
+
dataset:
|
| 18 |
+
name: Pusher-v5
|
| 19 |
+
type: Pusher-v5
|
| 20 |
+
metrics:
|
| 21 |
+
- type: mean_reward
|
| 22 |
+
value: -34.84 +/- 4.74
|
| 23 |
+
name: mean_reward
|
| 24 |
+
verified: false
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
# **PPO** Agent playing **Pusher-v5**
|
| 28 |
+
This is a trained model of a **PPO** agent playing **Pusher-v5**.
|
| 29 |
+
|
| 30 |
+
## Usage
|
| 31 |
+
### create the conda env in https://github.com/GeneHit/drl_practice
|
| 32 |
+
```bash
|
| 33 |
+
conda create -n drl python=3.12
|
| 34 |
+
conda activate drl
|
| 35 |
+
python -m pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### play with full model
|
| 39 |
+
```python
|
| 40 |
+
# load the full model
|
| 41 |
+
model = load_from_hub(repo_id="winkin119/PPO-DDP-PusherV2", filename="full_model.pt")
|
| 42 |
+
|
| 43 |
+
# Create the environment.
|
| 44 |
+
env = gym.make("Pusher-v5")
|
| 45 |
+
state, _ = env.reset()
|
| 46 |
+
action = model.action(state)
|
| 47 |
+
...
|
| 48 |
+
```
|
| 49 |
+
There is also a state dict version of the model, you can check the corresponding definition in the repo.
|
eval_result.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mean_reward": -34.83662994066047,
|
| 3 |
+
"std_reward": 4.737437200988468,
|
| 4 |
+
"datetime": "2025-08-06T18:39:37.074015+00:00",
|
| 5 |
+
"train_duration_min": "3.63"
|
| 6 |
+
}
|
full_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae748b2efb7ae76625871fe4d09af9adad67544bd090a15c362eb58ea2f8dd07
|
| 3 |
+
size 91125
|
params.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"env_config": {
|
| 3 |
+
"env_id": "Pusher-v5",
|
| 4 |
+
"env_kwargs": {},
|
| 5 |
+
"max_steps": null,
|
| 6 |
+
"normalize_obs": false,
|
| 7 |
+
"use_image": false,
|
| 8 |
+
"vector_env_num": 2,
|
| 9 |
+
"use_multi_processing": false,
|
| 10 |
+
"image_shape": null,
|
| 11 |
+
"frame_stack": 1,
|
| 12 |
+
"frame_skip": 1,
|
| 13 |
+
"training_render_mode": null
|
| 14 |
+
},
|
| 15 |
+
"device": "cpu",
|
| 16 |
+
"learning_rate": 0.0001,
|
| 17 |
+
"gamma": 0.99,
|
| 18 |
+
"checkpoint_pathname": "",
|
| 19 |
+
"max_grad_norm": 1.0,
|
| 20 |
+
"log_interval": 1,
|
| 21 |
+
"eval_episodes": 100,
|
| 22 |
+
"eval_random_seed": 42,
|
| 23 |
+
"eval_video_num": 10,
|
| 24 |
+
"timesteps": 813,
|
| 25 |
+
"rollout_len": 512,
|
| 26 |
+
"gae_lambda": 0.95,
|
| 27 |
+
"entropy_coef": {
|
| 28 |
+
"_type": "LinearSchedule",
|
| 29 |
+
"_module": "practice.utils_for_coding.scheduler_utils",
|
| 30 |
+
"_start_e": 0.01,
|
| 31 |
+
"_end_e": 0.001,
|
| 32 |
+
"_duration": 731,
|
| 33 |
+
"_start_t": 0
|
| 34 |
+
},
|
| 35 |
+
"value_loss_coef": 1.0,
|
| 36 |
+
"critic_lr": 0.0001,
|
| 37 |
+
"num_epochs": 6,
|
| 38 |
+
"minibatch_num": 8,
|
| 39 |
+
"clip_coef": 0.2,
|
| 40 |
+
"value_clip_range": 1.0,
|
| 41 |
+
"reward_configs": [],
|
| 42 |
+
"action_scale": 1,
|
| 43 |
+
"action_bias": 0,
|
| 44 |
+
"log_std_min": -10,
|
| 45 |
+
"log_std_max": 2,
|
| 46 |
+
"use_layer_norm": true,
|
| 47 |
+
"hidden_sizes": [
|
| 48 |
+
128,
|
| 49 |
+
128
|
| 50 |
+
],
|
| 51 |
+
"log_std_state_dependent": false,
|
| 52 |
+
"world_size": 3
|
| 53 |
+
}
|
replay.mp4
ADDED
|
Binary file (16.1 kB). View file
|
|
|
state_dict.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6f20605fecef8974a1c6226a1636745c56bb4019b8c8c10caf0ac7cdbbad09
|
| 3 |
+
size 88629
|
tensorboard/events.out.tfevents.1754505352.winkindeMacBook-Air.local.87385.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb021fe8cc6a715f03b13a04929f0e7235c8e2c0dae5ff7676e58fd82d948e15
|
| 3 |
+
size 1166876
|