282 lines
10 KiB
Python
282 lines
10 KiB
Python
"""
|
|
TeamSpeak 6 Prometheus Exporter.
|
|
|
|
Polls the TS6 WebQuery HTTP API and exposes metrics
|
|
in Prometheus format on /metrics endpoint.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import signal
|
|
import logging
|
|
from prometheus_client import (
|
|
start_http_server,
|
|
Gauge,
|
|
Counter,
|
|
Info,
|
|
REGISTRY,
|
|
GC_COLLECTOR,
|
|
PLATFORM_COLLECTOR,
|
|
PROCESS_COLLECTOR,
|
|
)
|
|
from ts6_client import TS6Client
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Logging
|
|
# ---------------------------------------------------------------------------
|
|
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
|
|
logging.basicConfig(
|
|
level=getattr(logging, LOG_LEVEL, logging.INFO),
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
logger = logging.getLogger("ts6_exporter")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration
|
|
# ---------------------------------------------------------------------------
|
|
TS6_HOST = os.getenv("TS6_HOST", "localhost")
|
|
TS6_QUERY_PORT = int(os.getenv("TS6_QUERY_PORT", "10080"))
|
|
TS6_API_KEY = os.getenv("TS6_API_KEY", "")
|
|
TS6_SERVER_ID = int(os.getenv("TS6_SERVER_ID", "1"))
|
|
EXPORTER_PORT = int(os.getenv("EXPORTER_PORT", "9189"))
|
|
POLL_INTERVAL = int(os.getenv("POLL_INTERVAL", "15"))
|
|
METRIC_PREFIX = os.getenv("METRIC_PREFIX", "ts6")
|
|
|
|
if not TS6_API_KEY:
|
|
logger.error("TS6_API_KEY environment variable is required!")
|
|
sys.exit(1)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Optionally remove default Python collectors for cleaner output
|
|
# ---------------------------------------------------------------------------
|
|
DISABLE_DEFAULT_COLLECTORS = os.getenv("DISABLE_DEFAULT_COLLECTORS", "true").lower() == "true"
|
|
if DISABLE_DEFAULT_COLLECTORS:
|
|
for collector in [GC_COLLECTOR, PLATFORM_COLLECTOR, PROCESS_COLLECTOR]:
|
|
try:
|
|
REGISTRY.unregister(collector)
|
|
except Exception:
|
|
pass
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prometheus Metrics
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Server
|
|
server_up = Gauge(f"{METRIC_PREFIX}_server_up", "Whether the TS6 server is reachable (1=up, 0=down)")
|
|
server_uptime = Gauge(f"{METRIC_PREFIX}_server_uptime_seconds", "Server uptime in seconds")
|
|
server_version_info = Info(f"{METRIC_PREFIX}_server_version", "TeamSpeak server version info")
|
|
|
|
# Clients
|
|
clients_online = Gauge(f"{METRIC_PREFIX}_clients_online", "Number of clients currently online (excluding query clients)")
|
|
clients_max = Gauge(f"{METRIC_PREFIX}_clients_max", "Maximum allowed clients")
|
|
query_clients_online = Gauge(f"{METRIC_PREFIX}_query_clients_online", "Number of query clients online")
|
|
|
|
# Channels
|
|
channels_total = Gauge(f"{METRIC_PREFIX}_channels_total", "Total number of channels")
|
|
|
|
# Bandwidth
|
|
bytes_sent = Gauge(f"{METRIC_PREFIX}_bytes_sent_total", "Total bytes sent by the server")
|
|
bytes_received = Gauge(f"{METRIC_PREFIX}_bytes_received_total", "Total bytes received by the server")
|
|
packets_sent = Gauge(f"{METRIC_PREFIX}_packets_sent_total", "Total packets sent")
|
|
packets_received = Gauge(f"{METRIC_PREFIX}_packets_received_total", "Total packets received")
|
|
|
|
# File Transfer
|
|
ft_bytes_sent = Gauge(f"{METRIC_PREFIX}_file_transfer_bytes_sent_total", "Total file transfer bytes sent")
|
|
ft_bytes_received = Gauge(f"{METRIC_PREFIX}_file_transfer_bytes_received_total", "Total file transfer bytes received")
|
|
|
|
# Quality
|
|
avg_ping = Gauge(f"{METRIC_PREFIX}_average_ping_seconds", "Average client ping in seconds")
|
|
avg_packet_loss = Gauge(f"{METRIC_PREFIX}_average_packet_loss", "Average client packet loss ratio")
|
|
|
|
# Bans
|
|
bans_total = Gauge(f"{METRIC_PREFIX}_bans_total", "Total number of active bans")
|
|
|
|
# Server Groups
|
|
server_groups_total = Gauge(f"{METRIC_PREFIX}_server_groups_total", "Total number of server groups")
|
|
|
|
# Per-client info (using labels)
|
|
client_info_gauge = Gauge(
|
|
f"{METRIC_PREFIX}_client_connected",
|
|
"Connected client info (1 = connected)",
|
|
["client_id", "nickname", "platform", "version", "country", "channel_id"],
|
|
)
|
|
|
|
# Scrape metrics
|
|
scrape_duration = Gauge(f"{METRIC_PREFIX}_scrape_duration_seconds", "Duration of the last scrape in seconds")
|
|
scrape_errors = Counter(f"{METRIC_PREFIX}_scrape_errors_total", "Total number of scrape errors")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Collector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def safe_int(value, default=0):
|
|
"""Safely convert a value to int."""
|
|
try:
|
|
return int(value)
|
|
except (ValueError, TypeError):
|
|
return default
|
|
|
|
|
|
def safe_float(value, default=0.0):
|
|
"""Safely convert a value to float."""
|
|
try:
|
|
return float(value)
|
|
except (ValueError, TypeError):
|
|
return default
|
|
|
|
|
|
def collect_metrics(client: TS6Client):
|
|
"""Collect all metrics from the TS6 server."""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Check if server is alive
|
|
alive = client.is_alive()
|
|
server_up.set(1 if alive else 0)
|
|
|
|
if not alive:
|
|
logger.warning("TS6 server is not reachable")
|
|
scrape_errors.inc()
|
|
return
|
|
|
|
# Server version (only needs to be set once, but low cost)
|
|
try:
|
|
ver = client.version()
|
|
if ver:
|
|
server_version_info.info({
|
|
"version": ver.get("version", "unknown"),
|
|
"build": str(ver.get("build", "unknown")),
|
|
"platform": ver.get("platform", "unknown"),
|
|
})
|
|
except Exception as e:
|
|
logger.debug("Could not get version: %s", e)
|
|
|
|
# Server info
|
|
try:
|
|
info = client.server_info()
|
|
if info:
|
|
server_uptime.set(safe_int(info.get("virtualserver_uptime", 0)))
|
|
clients_online.set(safe_int(info.get("virtualserver_clientsonline", 0)) - safe_int(info.get("virtualserver_queryclientsonline", 0)))
|
|
clients_max.set(safe_int(info.get("virtualserver_maxclients", 0)))
|
|
query_clients_online.set(safe_int(info.get("virtualserver_queryclientsonline", 0)))
|
|
channels_total.set(safe_int(info.get("virtualserver_channelsonline", 0)))
|
|
|
|
# Bandwidth
|
|
bytes_sent.set(safe_int(info.get("connection_bytes_sent_total", 0)))
|
|
bytes_received.set(safe_int(info.get("connection_bytes_received_total", 0)))
|
|
packets_sent.set(safe_int(info.get("connection_packets_sent_total", 0)))
|
|
packets_received.set(safe_int(info.get("connection_packets_received_total", 0)))
|
|
|
|
# File transfer
|
|
ft_bytes_sent.set(safe_int(info.get("connection_filetransfer_bytes_sent_total", 0)))
|
|
ft_bytes_received.set(safe_int(info.get("connection_filetransfer_bytes_received_total", 0)))
|
|
|
|
# Quality
|
|
avg_ping.set(safe_float(info.get("virtualserver_total_ping", 0.0)) / 1000.0)
|
|
avg_packet_loss.set(safe_float(info.get("virtualserver_total_packetloss_total", 0.0)))
|
|
except Exception as e:
|
|
logger.error("Error collecting server info: %s", e)
|
|
scrape_errors.inc()
|
|
|
|
# Bans
|
|
try:
|
|
bans = client.ban_list()
|
|
bans_total.set(len(bans))
|
|
except Exception as e:
|
|
logger.debug("Could not get ban list: %s", e)
|
|
bans_total.set(0)
|
|
|
|
# Server groups
|
|
try:
|
|
groups = client.server_group_list()
|
|
server_groups_total.set(len(groups))
|
|
except Exception as e:
|
|
logger.debug("Could not get server groups: %s", e)
|
|
|
|
# Per-client metrics
|
|
try:
|
|
# Clear previous client labels
|
|
client_info_gauge._metrics.clear()
|
|
|
|
clients = client.client_list()
|
|
for c in clients:
|
|
# Skip query clients (client_type=1)
|
|
if safe_int(c.get("client_type", 0)) == 1:
|
|
continue
|
|
|
|
client_info_gauge.labels(
|
|
client_id=c.get("clid", ""),
|
|
nickname=c.get("client_nickname", "unknown"),
|
|
platform=c.get("client_platform", "unknown"),
|
|
version=c.get("client_version", "unknown"),
|
|
country=c.get("client_country", ""),
|
|
channel_id=c.get("cid", ""),
|
|
).set(1)
|
|
except Exception as e:
|
|
logger.debug("Could not get client list: %s", e)
|
|
|
|
except Exception as e:
|
|
logger.error("Unexpected error during collection: %s", e)
|
|
server_up.set(0)
|
|
scrape_errors.inc()
|
|
|
|
finally:
|
|
duration = time.time() - start_time
|
|
scrape_duration.set(duration)
|
|
logger.info(
|
|
"Scrape completed in %.3fs | clients=%s channels=%s",
|
|
duration,
|
|
clients_online._value.get() if hasattr(clients_online._value, 'get') else '?',
|
|
channels_total._value.get() if hasattr(channels_total._value, 'get') else '?',
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
logger.info("=" * 60)
|
|
logger.info("TeamSpeak 6 Prometheus Exporter")
|
|
logger.info("=" * 60)
|
|
logger.info("TS6 Host: %s:%s", TS6_HOST, TS6_QUERY_PORT)
|
|
logger.info("Server ID: %s", TS6_SERVER_ID)
|
|
logger.info("Exporter Port: %s", EXPORTER_PORT)
|
|
logger.info("Poll Interval: %ss", POLL_INTERVAL)
|
|
logger.info("=" * 60)
|
|
|
|
client = TS6Client(
|
|
host=TS6_HOST,
|
|
port=TS6_QUERY_PORT,
|
|
api_key=TS6_API_KEY,
|
|
server_id=TS6_SERVER_ID,
|
|
)
|
|
|
|
# Start Prometheus HTTP server
|
|
start_http_server(EXPORTER_PORT)
|
|
logger.info("Metrics server started on http://0.0.0.0:%s/metrics", EXPORTER_PORT)
|
|
|
|
# Graceful shutdown
|
|
running = True
|
|
|
|
def shutdown(signum, frame):
|
|
nonlocal running
|
|
logger.info("Received signal %s, shutting down...", signum)
|
|
running = False
|
|
|
|
signal.signal(signal.SIGTERM, shutdown)
|
|
signal.signal(signal.SIGINT, shutdown)
|
|
|
|
# Main polling loop
|
|
while running:
|
|
collect_metrics(client)
|
|
time.sleep(POLL_INTERVAL)
|
|
|
|
logger.info("Exporter stopped.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|