feat(harmony_secret): SSO auth hardening — silent refresh, renewal, namespacing #302

Merged
johnride merged 1 commits from pr/harmony-secret-auth into master 2026-05-29 15:11:53 +00:00
6 changed files with 344 additions and 76 deletions

1
Cargo.lock generated
View File

@@ -4472,6 +4472,7 @@ dependencies = [
"tokio",
"url",
"vaultrs",
"webbrowser",
]
[[package]]

View File

@@ -30,6 +30,10 @@ schemars = "0.8"
vaultrs = "0.7.4"
reqwest = { workspace = true, features = ["json"] }
url.workspace = true
# Used by ZitadelOidcAuth to best-effort launch the device-flow
# URL in the operator's browser. Failure to open is non-fatal —
# the URL is already printed to the terminal.
webbrowser = "1"
[dev-dependencies]
pretty_assertions.workspace = true

View File

@@ -41,6 +41,4 @@ lazy_static! {
env::var("HARMONY_SSO_CLIENT_ID").ok();
pub static ref HARMONY_SSO_CLIENT_SECRET: Option<String> =
env::var("HARMONY_SSO_CLIENT_SECRET").ok();
pub static ref HARMONY_SECRETS_URL: Option<String> =
env::var("HARMONY_SECRETS_URL").ok();
}

View File

@@ -1,5 +1,5 @@
use async_trait::async_trait;
use log::{debug, info};
use log::{debug, info, warn};
use std::path::{Path, PathBuf};
use crate::{SecretStore, SecretStoreError};
@@ -8,43 +8,73 @@ use crate::{SecretStore, SecretStoreError};
pub struct LocalFileSecretStore;
impl LocalFileSecretStore {
/// Helper to consistently generate the secret file path.
fn get_file_path(base_dir: &Path, ns: &str, key: &str) -> PathBuf {
/// Current on-disk layout: `<base>/<ns>/<key>.json`.
///
/// Nesting by namespace gives each `harmony_secret` consumer its own
/// directory under `~/.local/share/harmony/secrets/`, so an example's
/// state can be deleted (e.g. `rm -rf .../secrets/harmony-sso-example/`)
/// without touching unrelated namespaces such as production deployments.
fn current_file_path(base_dir: &Path, ns: &str, key: &str) -> PathBuf {
base_dir.join(ns).join(format!("{key}.json"))
}
/// Legacy flat layout `<base>/<ns>_<key>.json`, kept only as a read
/// fallback so existing installs don't lose secrets after the
/// upgrade. Writes always use the current layout.
fn legacy_file_path(base_dir: &Path, ns: &str, key: &str) -> PathBuf {
base_dir.join(format!("{ns}_{key}.json"))
}
fn base_dir() -> PathBuf {
directories::BaseDirs::new()
.expect("Could not find a valid home directory")
.data_dir()
.join("harmony")
.join("secrets")
}
}
#[async_trait]
impl SecretStore for LocalFileSecretStore {
async fn get_raw(&self, ns: &str, key: &str) -> Result<Vec<u8>, SecretStoreError> {
let data_dir = directories::BaseDirs::new()
.expect("Could not find a valid home directory")
.data_dir()
.join("harmony")
.join("secrets");
let file_path = Self::get_file_path(&data_dir, ns, key);
let data_dir = Self::base_dir();
let file_path = Self::current_file_path(&data_dir, ns, key);
debug!(
"LOCAL_STORE: Getting key '{key}' from namespace '{ns}' at {}",
file_path.display()
);
tokio::fs::read(&file_path)
.await
.map_err(|_| SecretStoreError::NotFound {
namespace: ns.to_string(),
key: key.to_string(),
})
match tokio::fs::read(&file_path).await {
Ok(bytes) => Ok(bytes),
Err(_) => {
// Fall back to the legacy flat file written by older
// versions of harmony_secret. We do not auto-migrate
// on read (a subsequent `set_raw` will write the new
// location); operators can `mv` the file or wait for
// the next write to converge.
let legacy = Self::legacy_file_path(&data_dir, ns, key);
match tokio::fs::read(&legacy).await {
Ok(bytes) => {
warn!(
"LOCAL_STORE: Read from legacy flat path {}; the next set \
will persist to the namespaced layout at {}",
legacy.display(),
file_path.display()
);
Ok(bytes)
}
Err(_) => Err(SecretStoreError::NotFound {
namespace: ns.to_string(),
key: key.to_string(),
}),
}
}
}
}
async fn set_raw(&self, ns: &str, key: &str, val: &[u8]) -> Result<(), SecretStoreError> {
let data_dir = directories::BaseDirs::new()
.expect("Could not find a valid home directory")
.data_dir()
.join("harmony")
.join("secrets");
let file_path = Self::get_file_path(&data_dir, ns, key);
let data_dir = Self::base_dir();
let file_path = Self::current_file_path(&data_dir, ns, key);
info!(
"LOCAL_STORE: Setting key '{key}' in namespace '{ns}' at {}",
file_path.display()
@@ -67,6 +97,20 @@ mod tests {
use super::*;
use tempfile::tempdir;
#[tokio::test]
async fn current_path_nests_by_namespace() {
let dir = tempdir().unwrap();
let p = LocalFileSecretStore::current_file_path(dir.path(), "harmony-sso-example", "Foo");
assert_eq!(p, dir.path().join("harmony-sso-example").join("Foo.json"));
}
#[tokio::test]
async fn legacy_path_is_flat() {
let dir = tempdir().unwrap();
let p = LocalFileSecretStore::legacy_file_path(dir.path(), "harmony-sso-example", "Foo");
assert_eq!(p, dir.path().join("harmony-sso-example_Foo.json"));
}
#[tokio::test]
async fn test_set_and_get_raw_successfully() {
let dir = tempdir().unwrap();
@@ -74,17 +118,12 @@ mod tests {
let key = "test-key";
let value = b"{\"data\":\"test-value\"}";
// To test the store directly, we override the base directory logic.
// For this test, we'll manually construct the path within our temp dir.
let file_path = LocalFileSecretStore::get_file_path(dir.path(), ns, key);
// Manually write to the temp path to simulate the store's behavior
let file_path = LocalFileSecretStore::current_file_path(dir.path(), ns, key);
tokio::fs::create_dir_all(file_path.parent().unwrap())
.await
.unwrap();
tokio::fs::write(&file_path, value).await.unwrap();
// Now, test get_raw by reading from that same temp path (by mocking the path logic)
let retrieved_value = tokio::fs::read(&file_path).await.unwrap();
assert_eq!(retrieved_value, value);
}
@@ -95,8 +134,7 @@ mod tests {
let ns = "test-ns";
let key = "non-existent-key";
// We need to check if reading a non-existent file gives the correct error
let file_path = LocalFileSecretStore::get_file_path(dir.path(), ns, key);
let file_path = LocalFileSecretStore::current_file_path(dir.path(), ns, key);
let result = tokio::fs::read(&file_path).await;
assert!(result.is_err());

View File

@@ -77,10 +77,34 @@ impl OpenbaoSecretStore {
// 2. Try to load cached token
let cache_path = Self::get_token_cache_path(&base_url);
if let Ok(cached_token) = Self::load_cached_token(&cache_path) {
if let Ok(mut cached_token) = Self::load_cached_token(&cache_path) {
debug!("OPENBAO_STORE: Found cached token, validating...");
if Self::validate_token(&base_url, skip_tls, &cached_token.client_token).await {
info!("OPENBAO_STORE: Cached token is valid");
// Best-effort renewal: extend the server-side lease so
// sessions roll forward without an OIDC re-auth. The
// token has already been validated above, so a renew
// failure (server-side policy denying renewal, network
// blip, etc.) doesn't block the caller — we just keep
// the validated cached token as-is and let the next
// invocation try again.
match Self::renew_self(&base_url, skip_tls, &cached_token.client_token).await {
Ok(new_lease) => {
info!("OPENBAO_STORE: cached token renewed, lease_duration={new_lease}s");
cached_token.lease_duration = Some(new_lease);
if let Err(e) = Self::cache_token(&cache_path, &cached_token) {
warn!("OPENBAO_STORE: failed to persist renewed token: {e}");
}
}
Err(e) => {
warn!(
"OPENBAO_STORE: renew-self failed ({e}); keeping the validated \
cached token as-is"
);
}
}
return Self::with_token(
&base_url,
skip_tls,
@@ -267,7 +291,16 @@ impl OpenbaoSecretStore {
Ok(())
}
/// Validate if a token is still valid using vaultrs
/// Validate that the cached token is still usable.
///
/// Uses `auth/token/lookup-self`, NOT `auth/token/lookup`. The
/// latter looks up an arbitrary token and requires a privileged
/// policy (sudo on `auth/token/lookup`) — our SSO-derived tokens
/// carry only `default` + `harmony-dev`, so `lookup` always 403s
/// and made every cached token look invalid, forcing a needless
/// OIDC re-exchange on every run (and skipping `renew_self`).
/// `lookup-self` is granted to every token by the `default`
/// policy, so it correctly reports whether the token is live.
async fn validate_token(base_url: &str, skip_tls: bool, token: &str) -> bool {
let mut settings = VaultClientSettingsBuilder::default();
settings.address(base_url).token(token);
@@ -280,11 +313,66 @@ impl OpenbaoSecretStore {
Ok(s) => s,
Err(_) => return false,
};
return vaultrs::token::lookup(&client, token).await.is_ok();
return vaultrs::token::lookup_self(&client).await.is_ok();
}
false
}
/// Ask OpenBao to extend the token's lease via
/// `/v1/auth/token/renew-self`, returning the new
/// `lease_duration` in seconds. ADR 020-1 motivates this:
/// without periodic self-renewal the client token expires at
/// its initial TTL (4h by default) and the caller falls back
/// to a fresh OIDC device flow on the next run — i.e. the
/// operator goes back to the browser. With renewal the token
/// rolls forward until the role's `max_ttl` (24h), at which
/// point the OIDC refresh-token chain takes over.
///
/// Hand-rolled via `reqwest` rather than `vaultrs` because
/// vaultrs 0.7.4 doesn't expose a typed renew-self helper
/// scoped to our auth method; the raw call is the same shape
/// as `ZitadelOidcAuth::exchange_jwt_for_openbao_token`.
async fn renew_self(base_url: &str, skip_tls: bool, token: &str) -> Result<u64, String> {
let mut builder = reqwest::Client::builder();
if skip_tls {
builder = builder.danger_accept_invalid_certs(true);
}
let client = builder
.build()
.map_err(|e| format!("Failed to build HTTP client: {e}"))?;
let url = format!(
"{}/v1/auth/token/renew-self",
base_url.trim_end_matches('/')
);
let response = client
.post(&url)
.header("X-Vault-Token", token)
.send()
.await
.map_err(|e| format!("renew-self request to {url} failed: {e}"))?;
let status = response.status();
let body = response
.text()
.await
.map_err(|e| format!("Failed to read renew-self body ({status}): {e}"))?;
if !status.is_success() {
return Err(format!("renew-self returned {status}: {body}"));
}
let parsed: serde_json::Value = serde_json::from_str(&body)
.map_err(|e| format!("Failed to parse renew-self response: {e} — body={body}"))?;
let lease_duration = parsed
.get("auth")
.and_then(|a| a.get("lease_duration"))
.and_then(|v| v.as_u64())
.ok_or_else(|| format!("renew-self response missing auth.lease_duration: {body}"))?;
Ok(lease_duration)
}
/// Authenticate using username/password (userpass auth method)
async fn authenticate_userpass(
base_url: &str,

View File

@@ -1,4 +1,4 @@
use log::{debug, info};
use log::{debug, info, warn};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::PathBuf;
@@ -26,18 +26,6 @@ impl OidcSession {
false
}
}
pub fn is_openbao_token_expired(&self, _ttl: u64) -> bool {
if let Some(expires_at) = self.expires_at {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
expires_at <= now
} else {
false
}
}
}
#[derive(Debug, Deserialize)]
@@ -69,14 +57,15 @@ struct TokenErrorResponse {
error_description: Option<String>,
}
fn get_session_cache_path() -> PathBuf {
let hash = {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
"zitadel-oidc".hash(&mut hasher);
format!("{:016x}", hasher.finish())
};
/// Build the per-instance cache path for an OIDC session.
///
/// Previously this hashed the literal string `"zitadel-oidc"` — which
/// meant every Zitadel instance on the machine wrote to the same file
/// and the second login would silently overwrite the first. Hashing
/// `sso_url + client_id` instead gives each (Zitadel host, OAuth app)
/// pair its own cache file so multiple installs coexist.
fn get_session_cache_path(sso_url: &str, client_id: &str) -> PathBuf {
let hash = hash_session_key(sso_url, client_id);
directories::BaseDirs::new()
.map(|dirs| {
dirs.data_dir()
@@ -87,8 +76,19 @@ fn get_session_cache_path() -> PathBuf {
.unwrap_or_else(|| PathBuf::from(format!("/tmp/oidc_session_{hash}")))
}
fn load_session() -> Result<OidcSession, String> {
let path = get_session_cache_path();
fn hash_session_key(sso_url: &str, client_id: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
sso_url.hash(&mut hasher);
// Separator so ("abc", "def") and ("ab", "cdef") don't collide.
0u8.hash(&mut hasher);
client_id.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
fn load_session(sso_url: &str, client_id: &str) -> Result<OidcSession, String> {
let path = get_session_cache_path(sso_url, client_id);
serde_json::from_str(
&fs::read_to_string(&path)
.map_err(|e| format!("Could not load session from {path:?}: {e}"))?,
@@ -96,8 +96,8 @@ fn load_session() -> Result<OidcSession, String> {
.map_err(|e| format!("Could not deserialize session from {path:?}: {e}"))
}
fn save_session(session: &OidcSession) -> Result<(), String> {
let path = get_session_cache_path();
fn save_session(sso_url: &str, client_id: &str, session: &OidcSession) -> Result<(), String> {
let path = get_session_cache_path(sso_url, client_id);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
.map_err(|e| format!("Could not create session directory: {e}"))?;
@@ -162,11 +162,27 @@ impl ZitadelOidcAuth {
}
pub async fn authenticate(&self) -> Result<OidcSession, String> {
if let Ok(session) = load_session()
&& !session.is_expired()
{
info!("ZITADEL_OIDC: Using cached session");
return Ok(session);
if let Ok(session) = load_session(&self.sso_url, &self.client_id) {
if !session.is_expired() {
info!("ZITADEL_OIDC: Using cached session");
return Ok(session);
}
if let Some(refresh_token) = session.refresh_token.clone() {
info!("ZITADEL_OIDC: Session expired, attempting silent refresh");
match self.silent_refresh(&refresh_token).await {
Ok(refreshed) => {
if let Err(e) = save_session(&self.sso_url, &self.client_id, &refreshed) {
warn!("ZITADEL_OIDC: Failed to persist refreshed session: {e}");
}
return Ok(refreshed);
}
Err(e) => {
warn!(
"ZITADEL_OIDC: Silent refresh failed ({e}), falling back to device flow"
);
}
}
}
}
info!("ZITADEL_OIDC: Starting device authorization flow");
@@ -176,11 +192,59 @@ impl ZitadelOidcAuth {
let token_response = self
.poll_for_token(&device_code, device_code.interval)
.await?;
let session = self.process_token_response(token_response).await?;
let _ = save_session(&session);
let session = self.process_token_response(token_response, None).await?;
let _ = save_session(&self.sso_url, &self.client_id, &session);
Ok(session)
}
/// Use the OIDC `refresh_token` to obtain a new `id_token` from Zitadel
/// without prompting the user, then exchange it for a fresh OpenBao token
/// via the same JWT path the device flow uses.
///
/// If Zitadel does not rotate the refresh token (i.e. the refresh
/// response omits `refresh_token`), we preserve the previous one so the
/// next refresh can still proceed.
async fn silent_refresh(&self, refresh_token: &str) -> Result<OidcSession, String> {
let token_response = self.refresh_id_token(refresh_token).await?;
self.process_token_response(token_response, Some(refresh_token.to_string()))
.await
}
async fn refresh_id_token(&self, refresh_token: &str) -> Result<TokenResponse, String> {
let client = self.http_client()?;
let params = [
("grant_type", "refresh_token"),
("refresh_token", refresh_token),
("client_id", self.client_id.as_str()),
];
let response = client
.post(format!("{}/oauth/v2/token", self.sso_url))
.form(&params)
.send()
.await
.map_err(|e| format!("Refresh token request failed: {e}"))?;
let status = response.status();
let body = response
.text()
.await
.map_err(|e| format!("Failed to read refresh response body: {e}"))?;
if !status.is_success() {
if let Ok(error) = serde_json::from_str::<TokenErrorResponse>(&body) {
return Err(format!(
"Refresh failed: {} - {}",
error.error,
error.error_description.unwrap_or_default()
));
}
return Err(format!("Refresh failed with HTTP {status}: {body}"));
}
serde_json::from_str(&body).map_err(|e| format!("Failed to parse refresh response: {e}"))
}
fn http_client(&self) -> Result<reqwest::Client, String> {
let mut builder = reqwest::Client::builder();
if self.skip_tls {
@@ -208,22 +272,42 @@ impl ZitadelOidcAuth {
async fn request_device_code(&self) -> Result<DeviceAuthorizationResponse, String> {
let client = self.http_client()?;
let url = format!("{}/oauth/v2/device_authorization", self.sso_url);
let params = [
("client_id", self.client_id.as_str()),
("scope", "openid email profile offline_access"),
];
debug!("ZITADEL_OIDC: POST {url} (client_id={})", self.client_id);
let response = client
.post(format!("{}/oauth/v2/device_authorization", self.sso_url))
.post(&url)
.form(&params)
.send()
.await
.map_err(|e| format!("Device authorization request failed: {e}"))?;
.map_err(|e| format!("Device authorization request to {url} failed: {e}"))?;
response
.json::<DeviceAuthorizationResponse>()
.await
.map_err(|e| format!("Failed to parse device authorization response: {e}"))
let status = response.status();
let body = response.text().await.map_err(|e| {
format!("Failed to read device authorization response body ({status}): {e}")
})?;
if !status.is_success() {
// Surface the actual server response — without this, every
// failure looks like "error decoding response body" which is
// a parser-level error that hides the real cause (most often
// "client not registered for device-code grant").
return Err(format!(
"Device authorization endpoint {url} returned {status}: {body}"
));
}
serde_json::from_str::<DeviceAuthorizationResponse>(&body).map_err(|e| {
format!(
"Failed to parse device authorization response from {url} \
(HTTP {status}): {e}. Body was: {body}"
)
})
}
fn print_verification_instructions(&self, code: &DeviceAuthorizationResponse) {
@@ -240,6 +324,21 @@ impl ZitadelOidcAuth {
}
println!("=================================================");
println!();
// Best-effort: try to launch the operator's default browser
// directly at the device-code page. We prefer the
// verification_uri_complete (code pre-filled, one less
// copy-paste). On failure — headless host, no $BROWSER,
// running over plain SSH, ENOENT on xdg-open — the URL is
// already printed above so the operator can copy it
// manually. Don't gate the flow on the open succeeding.
let target = code
.verification_uri_complete
.as_deref()
.unwrap_or(code.verification_uri.as_str());
if let Err(e) = webbrowser::open(target) {
warn!("ZITADEL_OIDC: could not auto-open browser ({e}); copy the URL above manually");
}
}
async fn poll_for_token(
@@ -374,7 +473,11 @@ impl ZitadelOidcAuth {
Ok((client_token, lease_duration, renewable))
}
async fn process_token_response(&self, response: TokenResponse) -> Result<OidcSession, String> {
async fn process_token_response(
&self,
response: TokenResponse,
previous_refresh_token: Option<String>,
) -> Result<OidcSession, String> {
let (openbao_token, ttl, renewable) = if self.openbao_url.is_some() {
let id_token = response
.id_token
@@ -394,11 +497,16 @@ impl ZitadelOidcAuth {
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
// OAuth allows refresh-token rotation to be optional. If the IdP
// omits a new refresh_token in the response, keep the previous one
// so the next refresh attempt can still proceed.
let refresh_token = response.refresh_token.or(previous_refresh_token);
Ok(OidcSession {
openbao_token,
openbao_token_ttl: ttl,
openbao_renewable: renewable,
refresh_token: response.refresh_token,
refresh_token,
id_token: response.id_token,
expires_at: Some(now + ttl as i64),
})
@@ -436,4 +544,35 @@ mod tests {
};
assert!(!session2.is_expired());
}
#[test]
fn session_hash_is_stable_for_same_input() {
let a = hash_session_key("https://sso.example.com", "client-abc");
let b = hash_session_key("https://sso.example.com", "client-abc");
assert_eq!(a, b);
assert_eq!(a.len(), 16);
}
#[test]
fn session_hash_differs_per_sso_url() {
let a = hash_session_key("https://sso.a.com", "shared-client");
let b = hash_session_key("https://sso.b.com", "shared-client");
assert_ne!(a, b);
}
#[test]
fn session_hash_differs_per_client_id() {
let a = hash_session_key("https://sso.example.com", "client-a");
let b = hash_session_key("https://sso.example.com", "client-b");
assert_ne!(a, b);
}
#[test]
fn session_hash_separator_prevents_concatenation_collision() {
// Without the null-byte separator, ("ab", "cd") and ("abc", "d")
// would hash identically. The separator keeps the inputs distinct.
let a = hash_session_key("ab", "cd");
let b = hash_session_key("abc", "d");
assert_ne!(a, b);
}
}