forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker_full_main.sh
executable file
·55 lines (44 loc) · 2 KB
/
worker_full_main.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
mkdir -p $HOME/.cache/huggingface
echo -n "$HF_TOKEN" > $HOME/.cache/huggingface/token
export MODEL_CONFIG_NAME=${MODEL_CONFIG_NAME:-"OA_SFT_Pythia_12B"}
num_shards=${NUM_SHARDS:-1}
load_sleep=${LOAD_SLEEP:-0}
export MODEL_ID=$(/opt/miniconda/envs/worker/bin/python /worker/get_model_config_prop.py model_id)
export QUANTIZE=$(/opt/miniconda/envs/worker/bin/python /worker/get_model_config_prop.py quantized)
quantize=${QUANTIZE:-"false"}
quantize_args=""
if [ "$quantize" = "true" ]; then
quantize_args="--quantize"
fi
export HF_HUB_ENABLE_HF_TRANSFER=
export HF_HOME=$HOME/.cache/huggingface
export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
echo "Downloading model $MODEL_ID"
CUDA_VISIBLE_DEVICES="" /opt/miniconda/envs/text-generation/bin/python /worker/download_model.py
# if cuda devices is empty
if [ -z "$CUDA_VISIBLE_DEVICES" ]; then
worker_port=8300
echo "Starting worker server on port $worker_port"
text-generation-launcher --model-id $MODEL_ID --num-shard $num_shards $quantize_args --port $worker_port &
export INFERENCE_SERVER_URL="http://localhost:$worker_port"
echo "Starting worker"
/opt/miniconda/envs/worker/bin/python /worker &
else
# split cuda devices and loop over them
echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
IFS=',' read -ra devices <<< "$CUDA_VISIBLE_DEVICES"
for i in "${!devices[@]}"; do
device="${devices[$i]}"
worker_port=$((8300 + $i))
master_port=$((29500 + $i))
shard_uds_path="/tmp/text-generation-server-$i"
echo "Starting worker server $i on port $worker_port on device $device"
CUDA_VISIBLE_DEVICES=$device text-generation-launcher --model-id $MODEL_ID --num-shard $num_shards $quantize_args --port $worker_port --master-port $master_port --shard-uds-path $shard_uds_path &
echo "Starting worker $i"
CUDA_VISIBLE_DEVICES="" INFERENCE_SERVER_URL="http://localhost:$worker_port" /opt/miniconda/envs/worker/bin/python /worker &
sleep $load_sleep
done
fi
wait -n
exit $?