mirror of
https://git.freebsd.org/ports.git
synced 2026-06-02 11:08:52 +00:00
misc/llama-cpp: Multiple changes
1. Allow for multiple models to be selected at the run-time 2. Remove the leftover VULKAN option. VULKAN is enabled in misc/ggml. PR: 294795 (allow multiple models at run-time) Requested by: Ivan Rozhuk <rozhuk.im@gmail.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
PORTNAME= llama-cpp
|
||||
DISTVERSIONPREFIX= b
|
||||
DISTVERSION= 8895
|
||||
PORTREVISION= 1
|
||||
CATEGORIES= misc # machine-learning
|
||||
|
||||
MAINTAINER= yuri@FreeBSD.org
|
||||
@@ -36,8 +37,8 @@ CMAKE_TESTING_ON= LLAMA_BUILD_TESTS
|
||||
USER= nobody
|
||||
SUB_LIST= USER=${USER}
|
||||
|
||||
OPTIONS_DEFINE= CURL EXAMPLES VULKAN
|
||||
OPTIONS_DEFAULT= CURL VULKAN
|
||||
OPTIONS_DEFINE= CURL EXAMPLES
|
||||
OPTIONS_DEFAULT= CURL EXAMPLES
|
||||
OPTIONS_SUB= yes
|
||||
|
||||
CURL_DESCR= Use libcurl to download model from an URL
|
||||
@@ -47,11 +48,6 @@ CURL_LIB_DEPENDS= libcurl.so:ftp/curl
|
||||
|
||||
EXAMPLES_CMAKE_BOOL= LLAMA_BUILD_EXAMPLES
|
||||
|
||||
VULKAN_CMAKE_BOOL= GGML_VULKAN
|
||||
VULKAN_BUILD_DEPENDS= glslc:graphics/shaderc \
|
||||
vulkan-headers>0:graphics/vulkan-headers
|
||||
VULKAN_LIB_DEPENDS= libvulkan.so:graphics/vulkan-loader
|
||||
|
||||
BINARY_ALIAS= git=false \
|
||||
python=${PYTHON_CMD} # for tests
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# llama_server_user (str): llama_server daemon user
|
||||
# Default: %%USER%%
|
||||
# llama_server_model (str): AI model that llama-server will use
|
||||
# Default: "" (required)
|
||||
# Default: "" (not required)
|
||||
# llama_server_args (str): Additional arguments for llama-server
|
||||
# Default: "" (optional)
|
||||
# llama_server_log (str): Log file that llama-server will write log to
|
||||
@@ -38,20 +38,19 @@ run_command="%%PREFIX%%/bin/llama-server"
|
||||
procname="${run_command}"
|
||||
pidfile=${llama_server_pidfile}
|
||||
command=/usr/sbin/daemon
|
||||
command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args}"
|
||||
command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} ${llama_server_args}"
|
||||
start_precmd="llama_server_precmd"
|
||||
llama_server_chdir=/tmp
|
||||
|
||||
llama_server_precmd()
|
||||
{
|
||||
# check model
|
||||
if [ -z "${llama_server_model}" ]; then
|
||||
echo "llama_server_model isn't set, it is required"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${llama_server_model}" ]; then
|
||||
echo "llama_server_model isn't a file"
|
||||
exit 1
|
||||
if [ -n "${llama_server_model}" ]; then
|
||||
if [ ! -f "${llama_server_model}" ]; then
|
||||
echo "llama_server_model isn't a file"
|
||||
exit 1
|
||||
fi
|
||||
command_args="${command_args} -m ${llama_server_model}"
|
||||
fi
|
||||
|
||||
# ensure that the log file exists and has right permissions
|
||||
|
||||
@@ -22,6 +22,15 @@ and navigate to http://localhost:8080:
|
||||
> llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf
|
||||
> llama_server_args="--device Vulkan0 -ngl 27"
|
||||
|
||||
In order to use the multi-model feature do not use llama_server_model.
|
||||
Instead add the argument "--models-preset /path/to/models.ini"
|
||||
Add pre-downloaded models into models.ini, for example:
|
||||
[Qwen3.5-35B-A3B-Uncensored]
|
||||
model = /path/to/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf
|
||||
|
||||
You can switch to the CPU-only operation by choosing the port option
|
||||
VULKAN=OFF in misc/ggml (not in llama-cpp).
|
||||
|
||||
EOM
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user