feat(postgres): Postgres Connection Test score now has a script that provides more insight. Not quite working properly but easy to improve at this point.
Some checks failed
Run Check Script / check (pull_request) Failing after 43s

This commit is contained in:
2025-12-16 15:53:54 -05:00
parent c3ec7070ec
commit 9e8f3ce52f

View File

@@ -4,9 +4,7 @@ use k8s_openapi::api::core::v1::Secret;
use log::{debug, error, info, trace};
use serde::Serialize;
use std::collections::BTreeMap;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use tokio::process::Command;
use crate::data::Version;
@@ -16,6 +14,47 @@ use crate::score::Score;
use crate::topology::{K8sclient, Topology};
use harmony_types::id::Id;
/// PostgreSQLConnectionScore tests PostgreSQL database connectivity and performance metrics
/// for databases exposed via public endpoints. This score is specifically designed to verify
/// that PostgreSQL instances installed using the PublicPostgreSQLScore can be accessed by external clients.
///
/// The score performs the following tests:
/// 1. Verifies TLS/SSL connection using CA certificates from Kubernetes secrets
/// 2. Tests basic connectivity to the database
/// 3. (Optional, when db permissions are setup) Collects comprehensive performance metrics including :
/// - Database size and schema usage statistics
/// - Active connections and query activity
/// - Performance metrics (transactions per second, cache hit ratio)
/// - Index usage and table statistics
/// - Configuration parameters
///
/// The implementation uses a Docker container running PostgreSQL client tools to execute
/// the connection test, ensuring consistent behavior across different environments.
///
/// # Kubernetes Secrets Required
///
/// The score requires two Kubernetes secrets in the target namespace:
/// - `{cluster_name}-app`: Contains connection parameters (host, port, username, password, dbname)
/// - `{cluster_name}-ca`: Contains CA certificate (ca.crt) for TLS verification
///
/// # Usage
///
/// ```rust
/// use harmony::modules::postgresql::PostgreSQLConnectionScore;
///
/// let score = PostgreSQLConnectionScore::new(
/// "default",
/// "my-postgres-cluster",
/// None
/// );
/// ```
///
/// # Parameters
///
/// - `namespace`: Kubernetes namespace where the PostgreSQL secrets are located
/// - `cluster_name`: Name of the PostgreSQL cluster (used to construct secret names)
/// - `hostname_override`: Optional hostname override for connection testing
/// - `port_override`: Optional port override for connection testing
#[derive(Debug, Clone, Serialize)]
pub struct PostgreSQLConnectionScore {
pub name: String,
@@ -143,31 +182,13 @@ impl PostgreSQLConnectionInterpret {
.ok_or_else(|| InterpretError::new("Port not found in secret or override".to_string()))
}
fn create_test_script(
&self,
temp_dir: &Path,
ca_file: &Path,
username: &str,
password: &str,
dbname: &str,
host: &str,
port: u16,
) -> Result<PathBuf, InterpretError> {
fn create_test_script(&self, temp_dir: &Path) -> Result<PathBuf, InterpretError> {
let script_path = temp_dir.join("test_connection.sh");
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
let script_content = format!(
"#!/bin/sh\n\\
psql \"host={} port={} user={} dbname={} sslmode=verify-ca sslrootcert={} sslnegotiation=direct\" -c \"SELECT 1\"",
host,
port,
username,
dbname,
ca_file_in_container.display()
);
debug!("Wrote script content : \n{script_content}");
let script_content = postgres_scipt_content();
std::fs::write(&script_path, script_content)
.map_err(|e| InterpretError::new(format!("Failed to write test script: {e}")))?;
debug!("Wrote script content : \n{script_content}");
#[cfg(unix)]
{
@@ -187,13 +208,15 @@ impl PostgreSQLConnectionInterpret {
async fn run_docker_test(
&self,
temp_dir: &Path,
script_path: &Path,
cmd: &str,
password: &str,
) -> Result<Outcome, InterpretError> {
info!("Running connection test in Docker container...");
let container_cmd = format!("PGPASSWORD={} /tmp/test_connection.sh {}", password, cmd);
debug!("Starting docker container with cmd : {container_cmd}");
let output = Command::new("docker")
.arg("run")
let mut cmd = Command::new("docker");
cmd.arg("run")
.arg("--rm")
.arg("-i")
.arg("-v")
@@ -204,10 +227,12 @@ impl PostgreSQLConnectionInterpret {
.arg("/bin/sh")
.arg("postgres:latest")
.arg("-c")
.arg(format!("PGPASSWORD={} /tmp/test_connection.sh", password))
.arg(container_cmd)
.env("PGPASSWORD", password)
.stdout(std::process::Stdio::inherit())
.stderr(std::process::Stdio::inherit())
.stderr(std::process::Stdio::inherit());
debug!("Running Command {cmd:?}");
let output = cmd
.spawn()
.map_err(|e| InterpretError::new(format!("Failed to spawn docker container: {e}")))?
.wait_with_output()
@@ -220,11 +245,11 @@ impl PostgreSQLConnectionInterpret {
info!("Successfully connected to PostgreSQL!");
Ok(Outcome::success("Connection successful".to_string()))
} else {
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
error!("Connection failed: {}", stderr);
error!("Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}");
Err(InterpretError::new(format!(
"Connection failed: {}",
stderr
"Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}",
)))
}
}
@@ -277,23 +302,141 @@ impl<T: Topology + K8sclient + Send + Sync> Interpret<T> for PostgreSQLConnectio
let host = self.get_host(&app_data)?;
let port = self.get_port(&app_data)?;
// Create test script
let script_path = self.create_test_script(
temp_dir_path,
&ca_file,
&username,
&password,
&dbname,
&host,
port,
)?;
let script_path = self.create_test_script(temp_dir_path)?;
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
let script_cmd = format!(
"{host} {port} {username} {dbname} {}",
ca_file_in_container.display()
);
debug!("Prepared test script in {}", temp_dir_path.display());
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
// Run connection test
self.run_docker_test(temp_dir_path, &script_path, &password)
self.run_docker_test(temp_dir_path, &script_cmd, &password)
.await
}
}
fn postgres_scipt_content() -> &'static str {
r#"
#!/bin/sh
# PostgreSQL connection test and metrics collection script
# Basic connectivity test
echo "=== CONNECTION TEST ==="
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT 1" > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "ERROR: Connection failed"
exit 1
fi
echo "Connection successful"
# Database size metrics
echo -e "\n=== DATABASE SIZE METRICS ==="
echo "Total database size (MB):"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT pg_size_pretty(pg_database_size(current_database()))" -t -A
echo "Database size breakdown:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
schema_name,
pg_size_pretty(sum(table_size)) as total_size
FROM (
SELECT
n.nspname as schema_name,
c.relname as table_name,
pg_total_relation_size(c.oid) as table_size
FROM pg_class c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
AND c.relkind = 'r'
) t
GROUP BY schema_name
ORDER BY sum(table_size) DESC" -t
# Connection and activity metrics
echo -e "\n=== CONNECTION & ACTIVITY ==="
echo "Active connections:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT count(*) FROM pg_stat_activity" -t -A
echo "Current queries (running longer than 1 second):"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
pid,
usename,
query_start,
now() - query_start as duration,
state,
left(query, 50) as query_preview
FROM pg_stat_activity
WHERE state = 'active' AND now() - query_start > interval '1 second'
ORDER BY duration DESC" -t
# Performance metrics
echo -e "\n=== PERFORMANCE METRICS ==="
echo "Database load (transactions per second):"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
tps,
tps_commit,
tps_rollback,
blks_read,
blks_hit,
hit_ratio
FROM (
SELECT
xact_commit as tps_commit,
xact_rollback as tps_rollback,
(xact_commit + xact_rollback) as tps,
blks_read,
blks_hit,
CASE WHEN blks_read + blks_hit = 0 THEN 0 ELSE (blks_hit * 100.0 / (blks_read + blks_hit))::numeric(5,2) END as hit_ratio
FROM pg_stat_database
WHERE datname = current_database()
) stats" -t
echo "Current locks:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
count(*) as lock_count,
string_agg(mode, ', ' ORDER BY mode) as lock_modes
FROM pg_locks" -t
# Table statistics
echo -e "\n=== TABLE STATISTICS ==="
echo "Most accessed tables:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
relname,
seq_scan,
idx_scan,
n_tup_ins,
n_tup_upd,
n_tup_del
FROM pg_stat_user_tables
ORDER BY seq_scan + idx_scan + n_tup_ins + n_tup_upd + n_tup_del DESC
LIMIT 10" -t
# Index usage
echo -e "\n=== INDEX USAGE ==="
echo "Index usage statistics:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
indexrelname as index_name,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC
LIMIT 5" -t
# Configuration and limits
echo -e "\n=== CONFIGURATION ==="
echo "Current database parameters:"
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
name,
setting,
unit
FROM pg_settings
WHERE category = 'Resource Usage'
ORDER BY name" -t
echo -e "\n=== TEST COMPLETE ==="
echo "All metrics collected successfully"
exit 0
"#
}