misc/llama-cpp: Multiple changes

1. Allow for multiple models to be selected at the run-time 2. Remove the leftover VULKAN option. VULKAN is enabled in misc/ggml. PR: 294795 (allow multiple models at run-time) Requested by: Ivan Rozhuk <rozhuk.im@gmail.com>
2026-06-02 11:08:52 +00:00 · 2026-04-28 12:11:06 -07:00
parent ad3e6bb0c2
commit d31fe59500
3 changed files with 20 additions and 16 deletions
@@ -1,6 +1,7 @@
 PORTNAME=	llama-cpp
 DISTVERSIONPREFIX=	b
 DISTVERSION=	8895
+PORTREVISION=	1
 CATEGORIES=	misc # machine-learning

 MAINTAINER=	yuri@FreeBSD.org
@@ -36,8 +37,8 @@ CMAKE_TESTING_ON=	LLAMA_BUILD_TESTS
 USER=		nobody
 SUB_LIST=	USER=${USER}

-OPTIONS_DEFINE=		CURL EXAMPLES VULKAN
-OPTIONS_DEFAULT=	CURL VULKAN
+OPTIONS_DEFINE=		CURL EXAMPLES
+OPTIONS_DEFAULT=	CURL EXAMPLES
 OPTIONS_SUB=		yes

 CURL_DESCR=		Use libcurl to download model from an URL
@@ -47,11 +48,6 @@ CURL_LIB_DEPENDS=	libcurl.so:ftp/curl

 EXAMPLES_CMAKE_BOOL=	LLAMA_BUILD_EXAMPLES

-VULKAN_CMAKE_BOOL=	GGML_VULKAN
-VULKAN_BUILD_DEPENDS=	glslc:graphics/shaderc \
-			vulkan-headers>0:graphics/vulkan-headers
-VULKAN_LIB_DEPENDS=	libvulkan.so:graphics/vulkan-loader
-
 BINARY_ALIAS=	git=false \
 		python=${PYTHON_CMD} # for tests

@@ -13,7 +13,7 @@
 # llama_server_user (str):	llama_server daemon user
 #				Default: %%USER%%
 # llama_server_model (str):	AI model that llama-server will use
-#				Default: "" (required)
+#				Default: "" (not required)
 # llama_server_args (str):	Additional arguments for llama-server
 #				Default: "" (optional)
 # llama_server_log (str):	Log file that llama-server will write log to
@@ -38,20 +38,19 @@ run_command="%%PREFIX%%/bin/llama-server"
 procname="${run_command}"
 pidfile=${llama_server_pidfile}
 command=/usr/sbin/daemon
-command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args}"
+command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} ${llama_server_args}"
 start_precmd="llama_server_precmd"
 llama_server_chdir=/tmp

 llama_server_precmd()
 {
 	# check model
-	if [ -z "${llama_server_model}" ]; then
-		echo "llama_server_model isn't set, it is required"
-		exit 1
-	fi
-	if [ ! -f "${llama_server_model}" ]; then
-		echo "llama_server_model isn't a file"
-		exit 1
+	if [ -n "${llama_server_model}" ]; then
+		if [ ! -f "${llama_server_model}" ]; then
+			echo "llama_server_model isn't a file"
+			exit 1
+		fi
+		command_args="${command_args} -m ${llama_server_model}"
 	fi

 	# ensure that the log file exists and has right permissions
@@ -22,6 +22,15 @@ and navigate to http://localhost:8080:
 > llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf
 > llama_server_args="--device Vulkan0 -ngl 27"

+In order to use the multi-model feature do not use llama_server_model.
+Instead add the argument "--models-preset /path/to/models.ini"
+Add pre-downloaded models into models.ini, for example:
+[Qwen3.5-35B-A3B-Uncensored]
+model = /path/to/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf
+
+You can switch to the CPU-only operation by choosing the port option
+VULKAN=OFF in misc/ggml (not in llama-cpp).
+
 EOM
 }
 ]