Files
swarm-zap/skills/searxng-local-search/scripts/search.clj

153 lines
5.0 KiB
Clojure
Executable File

#!/usr/bin/env bb
(ns search
(:require [babashka.http-client :as http]
[cheshire.core :as json]
[clojure.string :as str]
[clojure.java.io :as io]))
(def default-endpoints
["http://localhost:8888"
"http://127.0.0.1:8888"
"http://192.168.153.113:18803"
"http://192.168.153.117:18803"])
(def min-delay-ms 1000)
(def timeout-ms 30000)
(def rate-file ".searxng-last-request")
(defn parse-options [s]
(if (or (nil? s) (str/blank? s))
{}
(try
(json/parse-string s true)
(catch Exception e
(binding [*out* *err*]
(println "Error: invalid options JSON")
(println (.getMessage e)))
(System/exit 2)))))
(defn now-ms [] (System/currentTimeMillis))
(defn last-request-ms []
(try
(when (.exists (io/file rate-file))
(Long/parseLong (str/trim (slurp rate-file))))
(catch Exception _ nil)))
(defn write-last-request! [ts]
(spit rate-file (str ts)))
(defn enforce-rate-limit! []
(when-let [last-ts (last-request-ms)]
(let [elapsed (- (now-ms) last-ts)]
(when (< elapsed min-delay-ms)
(Thread/sleep (- min-delay-ms elapsed))))))
(defn endpoint-candidates []
(let [env-url (some-> (System/getenv "SEARXNG_URL") str/trim)]
(if (and env-url (not (str/blank? env-url)))
(cons env-url default-endpoints)
default-endpoints)))
(defn category->param [category]
(when (and category (not= "general" category))
{(keyword (str "category_" category)) "1"}))
(defn build-params [query opts]
(merge
{:q query
:format "json"
:language (or (:language opts) "en")}
(when-let [tr (:time_range opts)] {:time_range tr})
(when-let [n (:num_results opts)] {:pageno 1 :count n})
(category->param (:category opts))))
(defn try-search [base-url params]
(let [url (str (str/replace base-url #"/$" "") "/search")]
(try
(let [resp (http/get url
{:query-params params
:timeout timeout-ms
:throw false
:headers {"accept" "application/json"}})]
(cond
(= 200 (:status resp))
{:ok true
:endpoint base-url
:body (json/parse-string (:body resp) true)}
(= 429 (:status resp))
{:ok false :retryable true :endpoint base-url :error "Rate limit exceeded (429)"}
:else
{:ok false :retryable true :endpoint base-url
:error (format "HTTP %s" (:status resp))}))
(catch Exception e
{:ok false :retryable true :endpoint base-url :error (.getMessage e)}))))
(defn top-results [results n]
(->> (or results [])
(sort-by (fn [r] (double (or (:score r) 0.0))) >)
(take n)))
(defn fmt-engines [r]
(let [engs (or (:engines r)
(when-let [e (:engine r)] [e])
[])]
(if (seq engs)
(str/join ", " engs)
"unknown")))
(defn print-results [query body num-results endpoint]
(let [total (or (:number_of_results body) (count (:results body)) 0)
results (top-results (:results body) num-results)]
(println (format "Search Results for \"%s\"" query))
(println (format "Found %s total results" total))
(println (format "Endpoint: %s" endpoint))
(println)
(if (seq results)
(doseq [[idx r] (map-indexed vector results)]
(println (format "%d. %s [Score: %.2f]"
(inc idx)
(or (:title r) "(untitled)")
(double (or (:score r) 0.0))))
(println (str " URL: " (or (:url r) "N/A")))
(println (str " " (or (:content r) "No description available.")))
(println (str " Engines: " (fmt-engines r)))
(println))
(println "No results found."))))
(defn usage []
(binding [*out* *err*]
(println "Usage: bb scripts/search.clj \"query\" '{\"category\":\"news\",\"time_range\":\"day\",\"num_results\":5}'")
(println)
(println "Options JSON keys: category, time_range, language, num_results")))
(defn -main [& args]
(let [[query opts-json] args]
(when (or (nil? query) (str/blank? query))
(usage)
(System/exit 1))
(let [opts (parse-options opts-json)
num-results (max 1 (min 20 (int (or (:num_results opts) 5))))
params (build-params query opts)]
(enforce-rate-limit!)
(write-last-request! (now-ms))
(loop [[endpoint & rest] (endpoint-candidates)
failures []]
(if (nil? endpoint)
(do
(binding [*out* *err*]
(println "Error: all SearXNG endpoints failed")
(doseq [{:keys [endpoint error]} failures]
(println (format "- %s -> %s" endpoint error))))
(System/exit 3))
(let [res (try-search endpoint params)]
(if (:ok res)
(print-results query (:body res) num-results endpoint)
(recur rest (conj failures (select-keys res [:endpoint :error]))))))))))
(apply -main *command-line-args*)