We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents d2c2cc1 + 5b5e034 commit 337557cCopy full SHA for 337557c
unsloth/models/rl.py
@@ -537,6 +537,9 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
537
"loss_type" : "bnpo", # Default GRPO paper
538
"beta" : 0.001, # Recommended as seen in verl
539
"auto_find_batch_size" : False, # Cannot work on GRPO
540
+ # [TODO] See https://fengyao.notion.site/off-policy-rl
541
+ # https://github.com/huggingface/trl/pull/3867 (August 7th)
542
+ "vllm_importance_sampling_correction" : False,
543
}
544
for k, v in replacements.items():
545
x = f"{k}( = [^,\n]{{1,}})?,\n"
0 commit comments