>>108809938
For natural language start a llama.cpp server with Gemma4/Qwen3.6 (MoE if VRAMlet, 31/27b if you have 24+gb vram), heretic variants if you need NSFW.
Then use a script like this:
import requests
import json
import os
import base64
from pathlib import Path
def encode_image_to_base64(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
folder_path = "pathtofolder"
url = "http://127.0.0.1:8080/v1/chat/completions"
for filename in os.listdir(folder_path):
if filename.endswith((".png", ".jpeg", ".jpg")):
base64_image = encode_image_to_base64(os.path.join(folder_path, filename))
data_url = f"data:image/jpeg;base64,{base64_image}"
headers = {
"Content-Type": "application/json"
}
payload = {
"messages": [
{
"role": "system",
"content": "WRITE A CAPTIONING SYS PROMPT HERE"
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Provide a caption for this image based on the guidelines."
},
{
"type": "image_url",
"image_url": {
"url": data_url
}
}
]
}
],
"temperature": 0.65,
"min_p": 0.01,
"reasoning": {
"effort": "medium"
}
}
response = requests.post(url, headers=headers, json=payload)
response = response.json()
with open (os.path.join(folder_path, filename.rpartition('.')[0]+".txt"), "w") as output:
output.write(response["choices"][0]["message"]["content"])