@@ -94,7 +94,6 @@ struct slot_params
94
94
bool stream = true ;
95
95
bool cache_prompt = false ; // remember the prompt to avoid reprocessing all prompt
96
96
97
- uint32_t seed = -1 ; // RNG seed
98
97
int32_t n_keep = 0 ; // number of tokens to keep from initial prompt
99
98
int32_t n_discard =
100
99
0 ; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
@@ -1100,7 +1099,7 @@ struct server_context
1100
1099
sampler_names.emplace_back (sampler_name);
1101
1100
}
1102
1101
}
1103
- slot.sparams .samplers_sequence = sampler_types_from_names (sampler_names, false );
1102
+ slot.sparams .samplers_sequence = llama_sampling_types_from_names (sampler_names, false );
1104
1103
}
1105
1104
else
1106
1105
{
@@ -1120,7 +1119,6 @@ struct server_context
1120
1119
send_error (task, " Failed to parse grammar" , ERROR_TYPE_INVALID_REQUEST);
1121
1120
return false ;
1122
1121
}
1123
- llama_set_rng_seed (ctx, slot.params .seed );
1124
1122
}
1125
1123
1126
1124
slot.command = SLOT_COMMAND_LOAD_PROMPT;
@@ -1374,13 +1372,13 @@ struct server_context
1374
1372
samplers_sequence.reserve (slot.sparams .samplers_sequence .size ());
1375
1373
for (const auto &sampler_type : slot.sparams .samplers_sequence )
1376
1374
{
1377
- samplers_sequence.emplace_back (sampler_type_to_name_string (sampler_type));
1375
+ samplers_sequence.emplace_back (llama_sampling_type_to_str (sampler_type));
1378
1376
}
1379
1377
1380
1378
return json{{" n_ctx" , slot.n_ctx },
1381
1379
{" n_predict" , slot.n_predict },
1382
1380
{" model" , params.model_alias },
1383
- {" seed" , slot.params .seed },
1381
+ {" seed" , slot.sparams .seed },
1384
1382
{" temperature" , slot.sparams .temp },
1385
1383
{" dynatemp_range" , slot.sparams .dynatemp_range },
1386
1384
{" dynatemp_exponent" , slot.sparams .dynatemp_exponent },
@@ -2143,7 +2141,7 @@ struct server_context
2143
2141
slot.command = SLOT_COMMAND_NONE;
2144
2142
slot.release ();
2145
2143
slot.print_timings ();
2146
- send_final_response (slot);
2144
+ send_error (slot, " input is too large to process. increase the physical batch size " , ERROR_TYPE_SERVER );
2147
2145
continue ;
2148
2146
}
2149
2147
}
0 commit comments