Skip to content

Commit 9d245e5

Browse files
committed
upgrade to llama.cpp b2969
1 parent 50c85b7 commit 9d245e5

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.12)
1+
cmake_minimum_required(VERSION 3.14)
22

33
project(jllama CXX)
44

@@ -22,7 +22,7 @@ FetchContent_MakeAvailable(json)
2222
FetchContent_Declare(
2323
llama.cpp
2424
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
25-
GIT_TAG b2885
25+
GIT_TAG b2969
2626
)
2727
FetchContent_MakeAvailable(llama.cpp)
2828

src/main/cpp/server.hpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ struct slot_params
9494
bool stream = true;
9595
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
9696

97-
uint32_t seed = -1; // RNG seed
9897
int32_t n_keep = 0; // number of tokens to keep from initial prompt
9998
int32_t n_discard =
10099
0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
@@ -1100,7 +1099,7 @@ struct server_context
11001099
sampler_names.emplace_back(sampler_name);
11011100
}
11021101
}
1103-
slot.sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
1102+
slot.sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
11041103
}
11051104
else
11061105
{
@@ -1120,7 +1119,6 @@ struct server_context
11201119
send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
11211120
return false;
11221121
}
1123-
llama_set_rng_seed(ctx, slot.params.seed);
11241122
}
11251123

11261124
slot.command = SLOT_COMMAND_LOAD_PROMPT;
@@ -1374,13 +1372,13 @@ struct server_context
13741372
samplers_sequence.reserve(slot.sparams.samplers_sequence.size());
13751373
for (const auto &sampler_type : slot.sparams.samplers_sequence)
13761374
{
1377-
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
1375+
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
13781376
}
13791377

13801378
return json{{"n_ctx", slot.n_ctx},
13811379
{"n_predict", slot.n_predict},
13821380
{"model", params.model_alias},
1383-
{"seed", slot.params.seed},
1381+
{"seed", slot.sparams.seed},
13841382
{"temperature", slot.sparams.temp},
13851383
{"dynatemp_range", slot.sparams.dynatemp_range},
13861384
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
@@ -2143,7 +2141,7 @@ struct server_context
21432141
slot.command = SLOT_COMMAND_NONE;
21442142
slot.release();
21452143
slot.print_timings();
2146-
send_final_response(slot);
2144+
send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
21472145
continue;
21482146
}
21492147
}

0 commit comments

Comments
 (0)