VerlTool/SWE-Qwen3-8B-VT-grpo-n32-b256-t1.0-lr2e-6
8B
•
Updated
•
2
VerlTool/pixel_reasoner-7b-grpo-n8-b128-t1.0-lr1e-6-complex-reward-new_global_step_50
8B
•
Updated
•
4
VerlTool/deepsearch-qwen_qwen3-8b-grpo-n16-b128-t1.0-lr1e-6-new_global_step_70
8B
•
Updated
•
4
VerlTool/torl_qwen2.5-math-7b-grpo-n16-b128-t1.0-lr1e-6acc-only-global_step_200
8B
•
Updated
•
4
VerlTool/pixel_reasoner-7b-grpo-n8-b128-t1.0-lr1e-6-complex-reward_global_step_90
8B
•
Updated
•
4
VerlTool/pixel-reaoner-3b-grpo-n8-b128-t1.0-lr1e-6-complex-reward_global_step_100
4B
•
Updated
•
4
VerlTool/torl_qwen2.5-math-1.5b-grpo-n16-b128-t1.0-lr1e-6-acc-only_global_step_340
2B
•
Updated
•
3
VerlTool/sqlcoder-qwen2.5-coder-7b-instruct-grpo-n5-b256-t0.6-lr1e-6_global_step_60
8B
•
Updated
•
7
VerlTool/search_r1_qa_em-qwen_qwen2.5-7b-grpo-n16-b512-64-t1.0-lr1e-6-dapo_global_step_140
8B
•
Updated
•
4
VerlTool/pixel_reasoner-7b-grpo-n8-b128-t1.0-lr1e-6_global_step_80
8B
•
Updated
•
5
VerlTool/pixel_reasoner-3b-grpo-n8-b128-t1.0-lr1e-6_global_step_90
4B
•
Updated
•
8
VerlTool/search_r1_qa_em-qwen_qwen2.5-7b-grpo-n16-b512-64-t1.0-lr1e-6-dapo_global_step_40
8B
•
Updated
•
9
VerlTool/deepsearch-qwen_qwen3-8b-grpo-n16-b128-t1.0-lr1e-6-512-64_global_step_40
8B
•
Updated
•
4
VerlTool/search_r1_qa_em-qwen_qwen2.5-7b-grpo-n16-b512-64-t1.0-lr1e-6_global_step_100
8B
•
Updated
•
9
VerlTool/search_r1_qa_em-qwen_qwen2.5-3b-grpo-n16-b512-64-t1.0-lr1e-6-dapo_global_step_160
3B
•
Updated
•
4
VerlTool/search_r1_qa_em-qwen_qwen2.5-3b-grpo-n16-b512-64-t1.0-lr1e-6debug_global_step_100
3B
•
Updated
•
8
VerlTool/torl-qwen_qwen2.5-math-1.5b-grpo-n16-b128-t1.0-lr1e-6dapo-with-penalty_global_step_650
2B
•
Updated
•
7
VerlTool/torl-qwen_qwen2.5-math-1.5b-grpo-n16-b128-t1.0-lr1e-6-with-penalty_global_step_580
2B
•
Updated
•
5
VerlTool/torl-qwen_qwen2.5-math-7b-grpo-n16-b128-t1.0-lr1e-6dapo-with-penalty_global_step_440
8B
•
Updated
•
8
VerlTool/pixel-reaoner-qwen2-5vl-3b-sft
Image-to-Text
•
4B
•
Updated
•
465
VerlTool/torl-fsdp-agent-qwen_qwen2.5-math-7b-dapo-step-230
8B
•
Updated
•
4
VerlTool/torl-fsdp2-agent-qwen_qwen2.5-math-7b-grpo-100-step-no-env
8B
•
Updated
•
6
VerlTool/acecoder-fsdp_agent-qwen_qwen2.5-coder-1.5b-instruct-grpo-69k-sys12-mtrl-d1fo-535-step
2B
•
Updated
•
7
VerlTool/acecoder-fsdp_agent-qwen_qwen2.5-coder-1.5b-instruct-grpo-69k-sys12-mtrl-d1fo-280-step
2B
•
Updated
•
10
VerlTool/torl-deep_math-fsdp_agent-qwen2.5-math-7b-grpo-n16-b128-t1.0-lr1e-6-310-step
8B
•
Updated
•
4
VerlTool/torl-deep_math-fsdp_agent-qwen2.5-math-1.5b-grpo-n16-b128-t1.0-lr1e-6-320-step
2B
•
Updated
•
77
VerlTool/torl-deep_math-fsdp-qwen2.5-math-1.5b-grpo-n16-b128-t1.0-lr1e-6-830-step
2B
•
Updated
•
14
VerlTool/acecoder-fsdp_agent-mimo-7b-base-grpo-n16-b128-t1.0-lr1e-6-69k-mtrl-sys9-new2-debug-120-step
8B
•
Updated
•
9
VerlTool/acecoder-fsdp_agent-xiaomimimo_mimo-7b-base-grpo-n16-b128-t1.0-lr1e-6-69k-2turn-sys4-120-step
8B
•
Updated
•
3
VerlTool/torl-fsdp_agent-qwen_qwen2.5-math-7b-grpo-n16-b128-t1.0-lr1e-6-mtrl-v6-330-step
8B
•
Updated
•
9