feat(postgres): Postgres Connection Test score now has a script that provides more insight. Not quite working properly but easy to improve at this point.
Some checks failed
Run Check Script / check (pull_request) Failing after 43s
Some checks failed
Run Check Script / check (pull_request) Failing after 43s
This commit is contained in:
@@ -4,9 +4,7 @@ use k8s_openapi::api::core::v1::Secret;
|
||||
use log::{debug, error, info, trace};
|
||||
use serde::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Stdio;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::data::Version;
|
||||
@@ -16,6 +14,47 @@ use crate::score::Score;
|
||||
use crate::topology::{K8sclient, Topology};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
/// PostgreSQLConnectionScore tests PostgreSQL database connectivity and performance metrics
|
||||
/// for databases exposed via public endpoints. This score is specifically designed to verify
|
||||
/// that PostgreSQL instances installed using the PublicPostgreSQLScore can be accessed by external clients.
|
||||
///
|
||||
/// The score performs the following tests:
|
||||
/// 1. Verifies TLS/SSL connection using CA certificates from Kubernetes secrets
|
||||
/// 2. Tests basic connectivity to the database
|
||||
/// 3. (Optional, when db permissions are setup) Collects comprehensive performance metrics including :
|
||||
/// - Database size and schema usage statistics
|
||||
/// - Active connections and query activity
|
||||
/// - Performance metrics (transactions per second, cache hit ratio)
|
||||
/// - Index usage and table statistics
|
||||
/// - Configuration parameters
|
||||
///
|
||||
/// The implementation uses a Docker container running PostgreSQL client tools to execute
|
||||
/// the connection test, ensuring consistent behavior across different environments.
|
||||
///
|
||||
/// # Kubernetes Secrets Required
|
||||
///
|
||||
/// The score requires two Kubernetes secrets in the target namespace:
|
||||
/// - `{cluster_name}-app`: Contains connection parameters (host, port, username, password, dbname)
|
||||
/// - `{cluster_name}-ca`: Contains CA certificate (ca.crt) for TLS verification
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// ```rust
|
||||
/// use harmony::modules::postgresql::PostgreSQLConnectionScore;
|
||||
///
|
||||
/// let score = PostgreSQLConnectionScore::new(
|
||||
/// "default",
|
||||
/// "my-postgres-cluster",
|
||||
/// None
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `namespace`: Kubernetes namespace where the PostgreSQL secrets are located
|
||||
/// - `cluster_name`: Name of the PostgreSQL cluster (used to construct secret names)
|
||||
/// - `hostname_override`: Optional hostname override for connection testing
|
||||
/// - `port_override`: Optional port override for connection testing
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PostgreSQLConnectionScore {
|
||||
pub name: String,
|
||||
@@ -143,31 +182,13 @@ impl PostgreSQLConnectionInterpret {
|
||||
.ok_or_else(|| InterpretError::new("Port not found in secret or override".to_string()))
|
||||
}
|
||||
|
||||
fn create_test_script(
|
||||
&self,
|
||||
temp_dir: &Path,
|
||||
ca_file: &Path,
|
||||
username: &str,
|
||||
password: &str,
|
||||
dbname: &str,
|
||||
host: &str,
|
||||
port: u16,
|
||||
) -> Result<PathBuf, InterpretError> {
|
||||
fn create_test_script(&self, temp_dir: &Path) -> Result<PathBuf, InterpretError> {
|
||||
let script_path = temp_dir.join("test_connection.sh");
|
||||
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
|
||||
let script_content = format!(
|
||||
"#!/bin/sh\n\\
|
||||
psql \"host={} port={} user={} dbname={} sslmode=verify-ca sslrootcert={} sslnegotiation=direct\" -c \"SELECT 1\"",
|
||||
host,
|
||||
port,
|
||||
username,
|
||||
dbname,
|
||||
ca_file_in_container.display()
|
||||
);
|
||||
|
||||
debug!("Wrote script content : \n{script_content}");
|
||||
let script_content = postgres_scipt_content();
|
||||
std::fs::write(&script_path, script_content)
|
||||
.map_err(|e| InterpretError::new(format!("Failed to write test script: {e}")))?;
|
||||
debug!("Wrote script content : \n{script_content}");
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
@@ -187,13 +208,15 @@ impl PostgreSQLConnectionInterpret {
|
||||
async fn run_docker_test(
|
||||
&self,
|
||||
temp_dir: &Path,
|
||||
script_path: &Path,
|
||||
cmd: &str,
|
||||
password: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Running connection test in Docker container...");
|
||||
let container_cmd = format!("PGPASSWORD={} /tmp/test_connection.sh {}", password, cmd);
|
||||
debug!("Starting docker container with cmd : {container_cmd}");
|
||||
|
||||
let output = Command::new("docker")
|
||||
.arg("run")
|
||||
let mut cmd = Command::new("docker");
|
||||
cmd.arg("run")
|
||||
.arg("--rm")
|
||||
.arg("-i")
|
||||
.arg("-v")
|
||||
@@ -204,10 +227,12 @@ impl PostgreSQLConnectionInterpret {
|
||||
.arg("/bin/sh")
|
||||
.arg("postgres:latest")
|
||||
.arg("-c")
|
||||
.arg(format!("PGPASSWORD={} /tmp/test_connection.sh", password))
|
||||
.arg(container_cmd)
|
||||
.env("PGPASSWORD", password)
|
||||
.stdout(std::process::Stdio::inherit())
|
||||
.stderr(std::process::Stdio::inherit())
|
||||
.stderr(std::process::Stdio::inherit());
|
||||
debug!("Running Command {cmd:?}");
|
||||
let output = cmd
|
||||
.spawn()
|
||||
.map_err(|e| InterpretError::new(format!("Failed to spawn docker container: {e}")))?
|
||||
.wait_with_output()
|
||||
@@ -220,11 +245,11 @@ impl PostgreSQLConnectionInterpret {
|
||||
info!("Successfully connected to PostgreSQL!");
|
||||
Ok(Outcome::success("Connection successful".to_string()))
|
||||
} else {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Connection failed: {}", stderr);
|
||||
error!("Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}");
|
||||
Err(InterpretError::new(format!(
|
||||
"Connection failed: {}",
|
||||
stderr
|
||||
"Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}",
|
||||
)))
|
||||
}
|
||||
}
|
||||
@@ -277,23 +302,141 @@ impl<T: Topology + K8sclient + Send + Sync> Interpret<T> for PostgreSQLConnectio
|
||||
let host = self.get_host(&app_data)?;
|
||||
let port = self.get_port(&app_data)?;
|
||||
|
||||
|
||||
// Create test script
|
||||
let script_path = self.create_test_script(
|
||||
temp_dir_path,
|
||||
&ca_file,
|
||||
&username,
|
||||
&password,
|
||||
&dbname,
|
||||
&host,
|
||||
port,
|
||||
)?;
|
||||
let script_path = self.create_test_script(temp_dir_path)?;
|
||||
|
||||
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
|
||||
let script_cmd = format!(
|
||||
"{host} {port} {username} {dbname} {}",
|
||||
ca_file_in_container.display()
|
||||
);
|
||||
debug!("Prepared test script in {}", temp_dir_path.display());
|
||||
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
|
||||
|
||||
// Run connection test
|
||||
self.run_docker_test(temp_dir_path, &script_path, &password)
|
||||
self.run_docker_test(temp_dir_path, &script_cmd, &password)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
fn postgres_scipt_content() -> &'static str {
|
||||
r#"
|
||||
#!/bin/sh
|
||||
# PostgreSQL connection test and metrics collection script
|
||||
|
||||
# Basic connectivity test
|
||||
echo "=== CONNECTION TEST ==="
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT 1" > /dev/null 2>&1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ERROR: Connection failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "Connection successful"
|
||||
|
||||
# Database size metrics
|
||||
echo -e "\n=== DATABASE SIZE METRICS ==="
|
||||
echo "Total database size (MB):"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT pg_size_pretty(pg_database_size(current_database()))" -t -A
|
||||
|
||||
echo "Database size breakdown:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
schema_name,
|
||||
pg_size_pretty(sum(table_size)) as total_size
|
||||
FROM (
|
||||
SELECT
|
||||
n.nspname as schema_name,
|
||||
c.relname as table_name,
|
||||
pg_total_relation_size(c.oid) as table_size
|
||||
FROM pg_class c
|
||||
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
|
||||
AND c.relkind = 'r'
|
||||
) t
|
||||
GROUP BY schema_name
|
||||
ORDER BY sum(table_size) DESC" -t
|
||||
|
||||
# Connection and activity metrics
|
||||
echo -e "\n=== CONNECTION & ACTIVITY ==="
|
||||
echo "Active connections:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT count(*) FROM pg_stat_activity" -t -A
|
||||
|
||||
echo "Current queries (running longer than 1 second):"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
pid,
|
||||
usename,
|
||||
query_start,
|
||||
now() - query_start as duration,
|
||||
state,
|
||||
left(query, 50) as query_preview
|
||||
FROM pg_stat_activity
|
||||
WHERE state = 'active' AND now() - query_start > interval '1 second'
|
||||
ORDER BY duration DESC" -t
|
||||
|
||||
# Performance metrics
|
||||
echo -e "\n=== PERFORMANCE METRICS ==="
|
||||
echo "Database load (transactions per second):"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
tps,
|
||||
tps_commit,
|
||||
tps_rollback,
|
||||
blks_read,
|
||||
blks_hit,
|
||||
hit_ratio
|
||||
FROM (
|
||||
SELECT
|
||||
xact_commit as tps_commit,
|
||||
xact_rollback as tps_rollback,
|
||||
(xact_commit + xact_rollback) as tps,
|
||||
blks_read,
|
||||
blks_hit,
|
||||
CASE WHEN blks_read + blks_hit = 0 THEN 0 ELSE (blks_hit * 100.0 / (blks_read + blks_hit))::numeric(5,2) END as hit_ratio
|
||||
FROM pg_stat_database
|
||||
WHERE datname = current_database()
|
||||
) stats" -t
|
||||
|
||||
echo "Current locks:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
count(*) as lock_count,
|
||||
string_agg(mode, ', ' ORDER BY mode) as lock_modes
|
||||
FROM pg_locks" -t
|
||||
|
||||
# Table statistics
|
||||
echo -e "\n=== TABLE STATISTICS ==="
|
||||
echo "Most accessed tables:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
relname,
|
||||
seq_scan,
|
||||
idx_scan,
|
||||
n_tup_ins,
|
||||
n_tup_upd,
|
||||
n_tup_del
|
||||
FROM pg_stat_user_tables
|
||||
ORDER BY seq_scan + idx_scan + n_tup_ins + n_tup_upd + n_tup_del DESC
|
||||
LIMIT 10" -t
|
||||
|
||||
# Index usage
|
||||
echo -e "\n=== INDEX USAGE ==="
|
||||
echo "Index usage statistics:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
indexrelname as index_name,
|
||||
idx_scan,
|
||||
idx_tup_read,
|
||||
idx_tup_fetch
|
||||
FROM pg_stat_user_indexes
|
||||
ORDER BY idx_scan DESC
|
||||
LIMIT 5" -t
|
||||
|
||||
# Configuration and limits
|
||||
echo -e "\n=== CONFIGURATION ==="
|
||||
echo "Current database parameters:"
|
||||
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||
name,
|
||||
setting,
|
||||
unit
|
||||
FROM pg_settings
|
||||
WHERE category = 'Resource Usage'
|
||||
ORDER BY name" -t
|
||||
|
||||
echo -e "\n=== TEST COMPLETE ==="
|
||||
echo "All metrics collected successfully"
|
||||
exit 0
|
||||
"#
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user