diff --git a/contrib/fine-tuning/convert-gguf.sh b/contrib/fine-tuning/convert-gguf.sh index fa0edf667..746af40e0 100755 --- a/contrib/fine-tuning/convert-gguf.sh +++ b/contrib/fine-tuning/convert-gguf.sh @@ -26,13 +26,21 @@ python convert_hf_to_gguf.py --outtype q8_0 --outfile ${CDXGEN_FT_PATH}/${HF_ORG cp ${CDXGEN_FT_PATH}/Modelfile ${GGUF_MODEL_Q8_0_PATH}/ # cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${GGUF_MODEL_Q8_0_PATH}/ -GGUF_MODEL_BF16_0_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF -GGUF_MODEL_BF16_0_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF -rm -rf ${GGUF_MODEL_BF16_0_PATH} -mkdir -p ${GGUF_MODEL_BF16_0_PATH} -python convert_hf_to_gguf.py --outtype bf16 --outfile ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF/${TOOL_BASE_MODEL}-gguf-bf16.gguf --model-name ${GGUF_MODEL_BF16_0_NAME} ${FUSED_MODEL} -cp ${CDXGEN_FT_PATH}/Modelfile ${GGUF_MODEL_BF16_0_PATH}/ -sed -i '' 's|./cdx1-gguf-q8_0.gguf|./cdx1-gguf-bf16.gguf|g' ${GGUF_MODEL_BF16_0_PATH}/Modelfile +GGUF_MODEL_BF16_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF +GGUF_MODEL_BF16_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF +rm -rf ${GGUF_MODEL_BF16_PATH} +mkdir -p ${GGUF_MODEL_BF16_PATH} +python convert_hf_to_gguf.py --outtype bf16 --outfile ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF/${TOOL_BASE_MODEL}-gguf-bf16.gguf --model-name ${GGUF_MODEL_BF16_NAME} ${FUSED_MODEL} +cp ${CDXGEN_FT_PATH}/Modelfile ${GGUF_MODEL_BF16_PATH}/ +sed -i '' 's|./cdx1-gguf-q8_0.gguf|./cdx1-gguf-bf16.gguf|g' ${GGUF_MODEL_BF16_PATH}/Modelfile + +GGUF_MODEL_Q4_K_M_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-gguf-Q4_K_M-GGUF +GGUF_MODEL_Q4_K_M_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-Q4_K_M-GGUF +rm -rf ${GGUF_MODEL_Q4_K_M_PATH} +mkdir -p ${GGUF_MODEL_Q4_K_M_PATH} +llama-quantize ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-gguf-BF16-GGUF/${TOOL_BASE_MODEL}-gguf-bf16.gguf ${GGUF_MODEL_Q4_K_M_PATH}/${TOOL_BASE_MODEL}-gguf-Q4_K_M.gguf Q4_K_M +cp ${CDXGEN_FT_PATH}/Modelfile ${GGUF_MODEL_Q4_K_M_PATH}/ +sed -i '' 's|./cdx1-gguf-q8_0.gguf|./cdx1-gguf-Q4_K_M.gguf|g' ${GGUF_MODEL_Q4_K_M_PATH}/Modelfile ### Testing with ollama # cd ${GGUF_MODEL_Q8_0_PATH} @@ -43,9 +51,20 @@ sed -i '' 's|./cdx1-gguf-q8_0.gguf|./cdx1-gguf-bf16.gguf|g' ${GGUF_MODEL_BF16_0_ export HF_HUB_ENABLE_HF_TRANSFER=0 huggingface-cli whoami huggingface-cli upload --quiet --repo-type model ${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_PATH} . -huggingface-cli upload --quiet --repo-type model ${GGUF_MODEL_BF16_0_NAME} ${GGUF_MODEL_BF16_0_PATH} . +huggingface-cli upload --quiet --repo-type model ${GGUF_MODEL_Q4_K_M_NAME} ${GGUF_MODEL_Q4_K_M_PATH} . +huggingface-cli upload --quiet --repo-type model ${GGUF_MODEL_BF16_NAME} ${GGUF_MODEL_BF16_PATH} . +ollama pull hf.co/${GGUF_MODEL_Q8_0_NAME} ollama cp hf.co/${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_NAME} ollama push ${GGUF_MODEL_Q8_0_NAME} -ollama cp hf.co/${GGUF_MODEL_BF16_0_NAME} ${GGUF_MODEL_BF16_0_NAME} -ollama push ${GGUF_MODEL_BF16_0_NAME} +ollama rm hf.co/${GGUF_MODEL_Q8_0_NAME} + +ollama pull hf.co/${GGUF_MODEL_Q4_K_M_NAME} +ollama cp hf.co/${GGUF_MODEL_Q4_K_M_NAME} ${GGUF_MODEL_Q4_K_M_NAME} +ollama push ${GGUF_MODEL_Q4_K_M_NAME} +ollama rm hf.co/${GGUF_MODEL_Q4_K_M_NAME} + +ollama pull hf.co/${GGUF_MODEL_BF16_NAME} +ollama cp hf.co/${GGUF_MODEL_BF16_NAME} ${GGUF_MODEL_BF16_NAME} +ollama push ${GGUF_MODEL_BF16_NAME} +ollama rm hf.co/${GGUF_MODEL_BF16_NAME}