feat(postgres): Postgres Connection Test score now has a script that provides more insight. Not quite working properly but easy to improve at this point.
Some checks failed
Run Check Script / check (pull_request) Failing after 43s
Some checks failed
Run Check Script / check (pull_request) Failing after 43s
This commit is contained in:
@@ -4,9 +4,7 @@ use k8s_openapi::api::core::v1::Secret;
|
|||||||
use log::{debug, error, info, trace};
|
use log::{debug, error, info, trace};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::io::Write;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::Stdio;
|
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
|
|
||||||
use crate::data::Version;
|
use crate::data::Version;
|
||||||
@@ -16,6 +14,47 @@ use crate::score::Score;
|
|||||||
use crate::topology::{K8sclient, Topology};
|
use crate::topology::{K8sclient, Topology};
|
||||||
use harmony_types::id::Id;
|
use harmony_types::id::Id;
|
||||||
|
|
||||||
|
/// PostgreSQLConnectionScore tests PostgreSQL database connectivity and performance metrics
|
||||||
|
/// for databases exposed via public endpoints. This score is specifically designed to verify
|
||||||
|
/// that PostgreSQL instances installed using the PublicPostgreSQLScore can be accessed by external clients.
|
||||||
|
///
|
||||||
|
/// The score performs the following tests:
|
||||||
|
/// 1. Verifies TLS/SSL connection using CA certificates from Kubernetes secrets
|
||||||
|
/// 2. Tests basic connectivity to the database
|
||||||
|
/// 3. (Optional, when db permissions are setup) Collects comprehensive performance metrics including :
|
||||||
|
/// - Database size and schema usage statistics
|
||||||
|
/// - Active connections and query activity
|
||||||
|
/// - Performance metrics (transactions per second, cache hit ratio)
|
||||||
|
/// - Index usage and table statistics
|
||||||
|
/// - Configuration parameters
|
||||||
|
///
|
||||||
|
/// The implementation uses a Docker container running PostgreSQL client tools to execute
|
||||||
|
/// the connection test, ensuring consistent behavior across different environments.
|
||||||
|
///
|
||||||
|
/// # Kubernetes Secrets Required
|
||||||
|
///
|
||||||
|
/// The score requires two Kubernetes secrets in the target namespace:
|
||||||
|
/// - `{cluster_name}-app`: Contains connection parameters (host, port, username, password, dbname)
|
||||||
|
/// - `{cluster_name}-ca`: Contains CA certificate (ca.crt) for TLS verification
|
||||||
|
///
|
||||||
|
/// # Usage
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use harmony::modules::postgresql::PostgreSQLConnectionScore;
|
||||||
|
///
|
||||||
|
/// let score = PostgreSQLConnectionScore::new(
|
||||||
|
/// "default",
|
||||||
|
/// "my-postgres-cluster",
|
||||||
|
/// None
|
||||||
|
/// );
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// - `namespace`: Kubernetes namespace where the PostgreSQL secrets are located
|
||||||
|
/// - `cluster_name`: Name of the PostgreSQL cluster (used to construct secret names)
|
||||||
|
/// - `hostname_override`: Optional hostname override for connection testing
|
||||||
|
/// - `port_override`: Optional port override for connection testing
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct PostgreSQLConnectionScore {
|
pub struct PostgreSQLConnectionScore {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@@ -143,31 +182,13 @@ impl PostgreSQLConnectionInterpret {
|
|||||||
.ok_or_else(|| InterpretError::new("Port not found in secret or override".to_string()))
|
.ok_or_else(|| InterpretError::new("Port not found in secret or override".to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_test_script(
|
fn create_test_script(&self, temp_dir: &Path) -> Result<PathBuf, InterpretError> {
|
||||||
&self,
|
|
||||||
temp_dir: &Path,
|
|
||||||
ca_file: &Path,
|
|
||||||
username: &str,
|
|
||||||
password: &str,
|
|
||||||
dbname: &str,
|
|
||||||
host: &str,
|
|
||||||
port: u16,
|
|
||||||
) -> Result<PathBuf, InterpretError> {
|
|
||||||
let script_path = temp_dir.join("test_connection.sh");
|
let script_path = temp_dir.join("test_connection.sh");
|
||||||
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
|
|
||||||
let script_content = format!(
|
|
||||||
"#!/bin/sh\n\\
|
|
||||||
psql \"host={} port={} user={} dbname={} sslmode=verify-ca sslrootcert={} sslnegotiation=direct\" -c \"SELECT 1\"",
|
|
||||||
host,
|
|
||||||
port,
|
|
||||||
username,
|
|
||||||
dbname,
|
|
||||||
ca_file_in_container.display()
|
|
||||||
);
|
|
||||||
|
|
||||||
debug!("Wrote script content : \n{script_content}");
|
let script_content = postgres_scipt_content();
|
||||||
std::fs::write(&script_path, script_content)
|
std::fs::write(&script_path, script_content)
|
||||||
.map_err(|e| InterpretError::new(format!("Failed to write test script: {e}")))?;
|
.map_err(|e| InterpretError::new(format!("Failed to write test script: {e}")))?;
|
||||||
|
debug!("Wrote script content : \n{script_content}");
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
{
|
{
|
||||||
@@ -187,13 +208,15 @@ impl PostgreSQLConnectionInterpret {
|
|||||||
async fn run_docker_test(
|
async fn run_docker_test(
|
||||||
&self,
|
&self,
|
||||||
temp_dir: &Path,
|
temp_dir: &Path,
|
||||||
script_path: &Path,
|
cmd: &str,
|
||||||
password: &str,
|
password: &str,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
info!("Running connection test in Docker container...");
|
info!("Running connection test in Docker container...");
|
||||||
|
let container_cmd = format!("PGPASSWORD={} /tmp/test_connection.sh {}", password, cmd);
|
||||||
|
debug!("Starting docker container with cmd : {container_cmd}");
|
||||||
|
|
||||||
let output = Command::new("docker")
|
let mut cmd = Command::new("docker");
|
||||||
.arg("run")
|
cmd.arg("run")
|
||||||
.arg("--rm")
|
.arg("--rm")
|
||||||
.arg("-i")
|
.arg("-i")
|
||||||
.arg("-v")
|
.arg("-v")
|
||||||
@@ -204,10 +227,12 @@ impl PostgreSQLConnectionInterpret {
|
|||||||
.arg("/bin/sh")
|
.arg("/bin/sh")
|
||||||
.arg("postgres:latest")
|
.arg("postgres:latest")
|
||||||
.arg("-c")
|
.arg("-c")
|
||||||
.arg(format!("PGPASSWORD={} /tmp/test_connection.sh", password))
|
.arg(container_cmd)
|
||||||
.env("PGPASSWORD", password)
|
.env("PGPASSWORD", password)
|
||||||
.stdout(std::process::Stdio::inherit())
|
.stdout(std::process::Stdio::inherit())
|
||||||
.stderr(std::process::Stdio::inherit())
|
.stderr(std::process::Stdio::inherit());
|
||||||
|
debug!("Running Command {cmd:?}");
|
||||||
|
let output = cmd
|
||||||
.spawn()
|
.spawn()
|
||||||
.map_err(|e| InterpretError::new(format!("Failed to spawn docker container: {e}")))?
|
.map_err(|e| InterpretError::new(format!("Failed to spawn docker container: {e}")))?
|
||||||
.wait_with_output()
|
.wait_with_output()
|
||||||
@@ -220,11 +245,11 @@ impl PostgreSQLConnectionInterpret {
|
|||||||
info!("Successfully connected to PostgreSQL!");
|
info!("Successfully connected to PostgreSQL!");
|
||||||
Ok(Outcome::success("Connection successful".to_string()))
|
Ok(Outcome::success("Connection successful".to_string()))
|
||||||
} else {
|
} else {
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
error!("Connection failed: {}", stderr);
|
error!("Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}");
|
||||||
Err(InterpretError::new(format!(
|
Err(InterpretError::new(format!(
|
||||||
"Connection failed: {}",
|
"Connection failed: stdout:\n{stdout}\nstderr:\n{stderr}",
|
||||||
stderr
|
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -277,23 +302,141 @@ impl<T: Topology + K8sclient + Send + Sync> Interpret<T> for PostgreSQLConnectio
|
|||||||
let host = self.get_host(&app_data)?;
|
let host = self.get_host(&app_data)?;
|
||||||
let port = self.get_port(&app_data)?;
|
let port = self.get_port(&app_data)?;
|
||||||
|
|
||||||
|
|
||||||
// Create test script
|
// Create test script
|
||||||
let script_path = self.create_test_script(
|
let script_path = self.create_test_script(temp_dir_path)?;
|
||||||
temp_dir_path,
|
|
||||||
&ca_file,
|
|
||||||
&username,
|
|
||||||
&password,
|
|
||||||
&dbname,
|
|
||||||
&host,
|
|
||||||
port,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
|
let ca_file_in_container = Path::new("/tmp").join(ca_file.file_name().unwrap());
|
||||||
|
let script_cmd = format!(
|
||||||
|
"{host} {port} {username} {dbname} {}",
|
||||||
|
ca_file_in_container.display()
|
||||||
|
);
|
||||||
debug!("Prepared test script in {}", temp_dir_path.display());
|
debug!("Prepared test script in {}", temp_dir_path.display());
|
||||||
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
|
|
||||||
|
|
||||||
// Run connection test
|
// Run connection test
|
||||||
self.run_docker_test(temp_dir_path, &script_path, &password)
|
self.run_docker_test(temp_dir_path, &script_cmd, &password)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn postgres_scipt_content() -> &'static str {
|
||||||
|
r#"
|
||||||
|
#!/bin/sh
|
||||||
|
# PostgreSQL connection test and metrics collection script
|
||||||
|
|
||||||
|
# Basic connectivity test
|
||||||
|
echo "=== CONNECTION TEST ==="
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT 1" > /dev/null 2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "ERROR: Connection failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Connection successful"
|
||||||
|
|
||||||
|
# Database size metrics
|
||||||
|
echo -e "\n=== DATABASE SIZE METRICS ==="
|
||||||
|
echo "Total database size (MB):"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT pg_size_pretty(pg_database_size(current_database()))" -t -A
|
||||||
|
|
||||||
|
echo "Database size breakdown:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
schema_name,
|
||||||
|
pg_size_pretty(sum(table_size)) as total_size
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
n.nspname as schema_name,
|
||||||
|
c.relname as table_name,
|
||||||
|
pg_total_relation_size(c.oid) as table_size
|
||||||
|
FROM pg_class c
|
||||||
|
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||||
|
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
|
||||||
|
AND c.relkind = 'r'
|
||||||
|
) t
|
||||||
|
GROUP BY schema_name
|
||||||
|
ORDER BY sum(table_size) DESC" -t
|
||||||
|
|
||||||
|
# Connection and activity metrics
|
||||||
|
echo -e "\n=== CONNECTION & ACTIVITY ==="
|
||||||
|
echo "Active connections:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT count(*) FROM pg_stat_activity" -t -A
|
||||||
|
|
||||||
|
echo "Current queries (running longer than 1 second):"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
pid,
|
||||||
|
usename,
|
||||||
|
query_start,
|
||||||
|
now() - query_start as duration,
|
||||||
|
state,
|
||||||
|
left(query, 50) as query_preview
|
||||||
|
FROM pg_stat_activity
|
||||||
|
WHERE state = 'active' AND now() - query_start > interval '1 second'
|
||||||
|
ORDER BY duration DESC" -t
|
||||||
|
|
||||||
|
# Performance metrics
|
||||||
|
echo -e "\n=== PERFORMANCE METRICS ==="
|
||||||
|
echo "Database load (transactions per second):"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
tps,
|
||||||
|
tps_commit,
|
||||||
|
tps_rollback,
|
||||||
|
blks_read,
|
||||||
|
blks_hit,
|
||||||
|
hit_ratio
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
xact_commit as tps_commit,
|
||||||
|
xact_rollback as tps_rollback,
|
||||||
|
(xact_commit + xact_rollback) as tps,
|
||||||
|
blks_read,
|
||||||
|
blks_hit,
|
||||||
|
CASE WHEN blks_read + blks_hit = 0 THEN 0 ELSE (blks_hit * 100.0 / (blks_read + blks_hit))::numeric(5,2) END as hit_ratio
|
||||||
|
FROM pg_stat_database
|
||||||
|
WHERE datname = current_database()
|
||||||
|
) stats" -t
|
||||||
|
|
||||||
|
echo "Current locks:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
count(*) as lock_count,
|
||||||
|
string_agg(mode, ', ' ORDER BY mode) as lock_modes
|
||||||
|
FROM pg_locks" -t
|
||||||
|
|
||||||
|
# Table statistics
|
||||||
|
echo -e "\n=== TABLE STATISTICS ==="
|
||||||
|
echo "Most accessed tables:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
relname,
|
||||||
|
seq_scan,
|
||||||
|
idx_scan,
|
||||||
|
n_tup_ins,
|
||||||
|
n_tup_upd,
|
||||||
|
n_tup_del
|
||||||
|
FROM pg_stat_user_tables
|
||||||
|
ORDER BY seq_scan + idx_scan + n_tup_ins + n_tup_upd + n_tup_del DESC
|
||||||
|
LIMIT 10" -t
|
||||||
|
|
||||||
|
# Index usage
|
||||||
|
echo -e "\n=== INDEX USAGE ==="
|
||||||
|
echo "Index usage statistics:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
indexrelname as index_name,
|
||||||
|
idx_scan,
|
||||||
|
idx_tup_read,
|
||||||
|
idx_tup_fetch
|
||||||
|
FROM pg_stat_user_indexes
|
||||||
|
ORDER BY idx_scan DESC
|
||||||
|
LIMIT 5" -t
|
||||||
|
|
||||||
|
# Configuration and limits
|
||||||
|
echo -e "\n=== CONFIGURATION ==="
|
||||||
|
echo "Current database parameters:"
|
||||||
|
psql "host=$1 port=$2 user=$3 dbname=$4 sslmode=verify-ca sslrootcert=$5 sslnegotiation=direct" -c "SELECT
|
||||||
|
name,
|
||||||
|
setting,
|
||||||
|
unit
|
||||||
|
FROM pg_settings
|
||||||
|
WHERE category = 'Resource Usage'
|
||||||
|
ORDER BY name" -t
|
||||||
|
|
||||||
|
echo -e "\n=== TEST COMPLETE ==="
|
||||||
|
echo "All metrics collected successfully"
|
||||||
|
exit 0
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user