feat/ask-for-main-disk #222

Merged
johnride merged 6 commits from feat/ask-for-main-disk into master 2026-01-27 17:13:57 +00:00
25 changed files with 155 additions and 2027 deletions

View File

@@ -0,0 +1,26 @@
{
"db_name": "SQLite",
"query": "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
"describe": {
"columns": [
{
"name": "host_id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "installation_device",
"ordinal": 1,
"type_info": "Text"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false,
true
]
},
"hash": "24f719d57144ecf4daa55f0aa5836c165872d70164401c0388e8d625f1b72d7b"
}

View File

@@ -1,20 +0,0 @@
{
"db_name": "SQLite",
"query": "SELECT host_id FROM host_role_mapping WHERE role = ?",
"describe": {
"columns": [
{
"name": "host_id",
"ordinal": 0,
"type_info": "Text"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false
]
},
"hash": "2ea29df2326f7c84bd4100ad510a3fd4878dc2e217dc83f9bf45a402dfd62a91"
}

View File

@@ -1,12 +1,12 @@
{ {
"db_name": "SQLite", "db_name": "SQLite",
"query": "\n INSERT INTO host_role_mapping (host_id, role)\n VALUES (?, ?)\n ", "query": "\n INSERT INTO host_role_mapping (host_id, role, installation_device)\n VALUES (?, ?, ?)\n ",
"describe": { "describe": {
"columns": [], "columns": [],
"parameters": { "parameters": {
"Right": 2 "Right": 3
}, },
"nullable": [] "nullable": []
}, },
"hash": "df7a7c9cfdd0972e2e0ce7ea444ba8bc9d708a4fb89d5593a0be2bbebde62aff" "hash": "6fcc29cfdbdf3b2cee94a4844e227f09b245dd8f079832a9a7b774151cb03af6"
} }

38
Cargo.lock generated
View File

@@ -1787,25 +1787,6 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "example-ha-cluster"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_macros",
"harmony_secret",
"harmony_tui",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]] [[package]]
name = "example-kube-rs" name = "example-kube-rs"
version = "0.1.0" version = "0.1.0"
@@ -1878,25 +1859,6 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "example-nanodc"
version = "0.1.0"
dependencies = [
"brocade",
"cidr",
"env_logger",
"harmony",
"harmony_macros",
"harmony_secret",
"harmony_tui",
"harmony_types",
"log",
"schemars 0.8.22",
"serde",
"tokio",
"url",
]
[[package]] [[package]]
name = "example-nats" name = "example-nats"
version = "0.1.0" version = "0.1.0"

View File

@@ -1,22 +0,0 @@
[package]
name = "example-ha-cluster"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_tui = { path = "../../harmony_tui" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
harmony_secret = { path = "../../harmony_secret" }
brocade = { path = "../../brocade" }
serde = { workspace = true }
schemars = "0.8"

View File

@@ -1,15 +0,0 @@
## OPNSense demo
Download the virtualbox snapshot from {{TODO URL}}
Start the virtualbox image
This virtualbox image is configured to use a bridge on the host's physical interface, make sure the bridge is up and the virtual machine can reach internet.
Credentials are opnsense default (root/opnsense)
Run the project with the correct ip address on the command line :
```bash
cargo run -p example-opnsense -- 192.168.5.229
```

View File

@@ -1,144 +0,0 @@
use std::{
net::{IpAddr, Ipv4Addr},
sync::{Arc, OnceLock},
};
use brocade::BrocadeOptions;
use cidr::Ipv4Cidr;
use harmony::{
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
inventory::Inventory,
modules::{
dummy::{ErrorScore, PanicScore, SuccessScore},
http::StaticFilesHttpScore,
okd::{dhcp::OKDDhcpScore, dns::OKDDnsScore, load_balancer::OKDLoadBalancerScore},
opnsense::OPNsenseShellCommandScore,
tftp::TftpScore,
},
topology::{LogicalHost, UnmanagedRouter},
};
use harmony_macros::{ip, mac_address};
use harmony_secret::{Secret, SecretManager};
use harmony_types::net::Url;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[tokio::main]
async fn main() {
let firewall = harmony::topology::LogicalHost {
ip: ip!("192.168.5.229"),
name: String::from("opnsense-1"),
};
let switch_auth = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.expect("Failed to get credentials");
let switches: Vec<IpAddr> = vec![ip!("192.168.5.101")]; // TODO: Adjust me
let brocade_options = BrocadeOptions {
dry_run: *harmony::config::DRY_RUN,
..Default::default()
};
let switch_client = BrocadeSwitchClient::init(
&switches,
&switch_auth.username,
&switch_auth.password,
brocade_options,
)
.await
.expect("Failed to connect to switch");
let switch_client = Arc::new(switch_client);
let opnsense = Arc::new(
harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await,
);
let lan_subnet = Ipv4Addr::new(10, 100, 8, 0);
let gateway_ipv4 = Ipv4Addr::new(10, 100, 8, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "demo.harmony.mcd".to_string(),
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
Ipv4Cidr::new(lan_subnet, 24).unwrap(),
)),
load_balancer: opnsense.clone(),
firewall: opnsense.clone(),
tftp_server: opnsense.clone(),
http_server: opnsense.clone(),
dhcp_server: opnsense.clone(),
dns_server: opnsense.clone(),
control_plane: vec![LogicalHost {
ip: ip!("10.100.8.20"),
name: "cp0".to_string(),
}],
bootstrap_host: LogicalHost {
ip: ip!("10.100.8.20"),
name: "cp0".to_string(),
},
workers: vec![],
switch_client: switch_client.clone(),
node_exporter: opnsense.clone(),
network_manager: OnceLock::new(),
};
let inventory = Inventory {
location: Location::new(
"232 des Éperviers, Wendake, Qc, G0A 4V0".to_string(),
"wk".to_string(),
),
switch: SwitchGroup::from([]),
firewall_mgmt: Box::new(OPNSenseManagementInterface::new()),
storage_host: vec![],
worker_host: vec![],
control_plane_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("08:00:27:62:EC:C3")),
],
};
// TODO regroup smaller scores in a larger one such as this
// let okd_boostrap_preparation();
let dhcp_score = OKDDhcpScore::new(&topology, &inventory);
let dns_score = OKDDnsScore::new(&topology);
let load_balancer_score = OKDLoadBalancerScore::new(&topology);
let tftp_score = TftpScore::new(Url::LocalFolder("./data/watchguard/tftpboot".to_string()));
let http_score = StaticFilesHttpScore {
folder_to_serve: Some(Url::LocalFolder(
"./data/watchguard/pxe-http-files".to_string(),
)),
files: vec![],
remote_path: None,
};
harmony_tui::run(
inventory,
topology,
vec![
Box::new(dns_score),
Box::new(dhcp_score),
Box::new(load_balancer_score),
Box::new(tftp_score),
Box::new(http_score),
Box::new(OPNsenseShellCommandScore {
opnsense: opnsense.get_opnsense_config(),
command: "touch /tmp/helloharmonytouching".to_string(),
}),
Box::new(SuccessScore {}),
Box::new(ErrorScore {}),
Box::new(PanicScore {}),
],
)
.await
.unwrap();
}
#[derive(Secret, Serialize, JsonSchema, Deserialize, Debug)]
pub struct BrocadeSwitchAuth {
pub username: String,
pub password: String,
}

View File

@@ -1,22 +0,0 @@
[package]
name = "example-nanodc"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_tui = { path = "../../harmony_tui" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
harmony_secret = { path = "../../harmony_secret" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }
serde = { workspace = true }
brocade = { path = "../../brocade" }
schemars = "0.8"

View File

@@ -1,4 +0,0 @@
#!/bin/bash
helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \
--set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml

View File

@@ -1,721 +0,0 @@
# Default values for a single rook-ceph cluster
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# -- Namespace of the main rook operator
operatorNamespace: rook-ceph
# -- The metadata.name of the CephCluster CR
# @default -- The same as the namespace
clusterName:
# -- Optional override of the target kubernetes version
kubeVersion:
# -- Cluster ceph.conf override
configOverride:
# configOverride: |
# [global]
# mon_allow_pool_delete = true
# osd_pool_default_size = 3
# osd_pool_default_min_size = 2
# Installs a debugging toolbox deployment
toolbox:
# -- Enable Ceph debugging pod deployment. See [toolbox](../Troubleshooting/ceph-toolbox.md)
enabled: true
# -- Toolbox image, defaults to the image used by the Ceph cluster
image: #quay.io/ceph/ceph:v19.2.2
# -- Toolbox tolerations
tolerations: []
# -- Toolbox affinity
affinity: {}
# -- Toolbox container security context
containerSecurityContext:
runAsNonRoot: true
runAsUser: 2016
runAsGroup: 2016
capabilities:
drop: ["ALL"]
# -- Toolbox resources
resources:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "128Mi"
# -- Set the priority class for the toolbox if desired
priorityClassName:
monitoring:
# -- Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors.
# Monitoring requires Prometheus to be pre-installed
enabled: false
# -- Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled
metricsDisabled: false
# -- Whether to create the Prometheus rules for Ceph alerts
createPrometheusRules: false
# -- The namespace in which to create the prometheus rules, if different from the rook cluster namespace.
# If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus
# deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions.
rulesNamespaceOverride:
# Monitoring settings for external clusters:
# externalMgrEndpoints: <list of endpoints>
# externalMgrPrometheusPort: <port>
# Scrape interval for prometheus
# interval: 10s
# allow adding custom labels and annotations to the prometheus rule
prometheusRule:
# -- Labels applied to PrometheusRule
labels: {}
# -- Annotations applied to PrometheusRule
annotations: {}
# -- Create & use PSP resources. Set this to the same value as the rook-ceph chart.
pspEnable: false
# imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts.
# imagePullSecrets:
# - name: my-registry-secret
# All values below are taken from the CephCluster CRD
# -- Cluster configuration.
# @default -- See [below](#ceph-cluster-spec)
cephClusterSpec:
# This cluster spec example is for a converged cluster where all the Ceph daemons are running locally,
# as in the host-based example (cluster.yaml). For a different configuration such as a
# PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml),
# or stretch cluster (cluster-stretched.yaml), replace this entire `cephClusterSpec`
# with the specs from those examples.
# For more details, check https://rook.io/docs/rook/v1.10/CRDs/Cluster/ceph-cluster-crd/
cephVersion:
# The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
# v18 is Reef, v19 is Squid
# RECOMMENDATION: In production, use a specific version tag instead of the general v18 flag, which pulls the latest release and could result in different
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.2-20250409
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
image: quay.io/ceph/ceph:v19.2.2
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
# Future versions such as Tentacle (v20) would require this to be set to `true`.
# Do not set to true in production.
allowUnsupported: false
# The path on the host where configuration files will be persisted. Must be specified. If there are multiple clusters, the directory must be unique for each cluster.
# Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
# In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
dataDirHostPath: /var/lib/rook
# Whether or not upgrade should continue even if a check fails
# This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
# Use at your OWN risk
# To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/v1.10/Upgrade/ceph-upgrade/
skipUpgradeChecks: false
# Whether or not continue if PGs are not clean during an upgrade
continueUpgradeAfterChecksEvenIfNotHealthy: false
# WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
# If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
# if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
# continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
# The default wait timeout is 10 minutes.
waitTimeoutForHealthyOSDInMinutes: 10
# Whether or not requires PGs are clean before an OSD upgrade. If set to `true` OSD upgrade process won't start until PGs are healthy.
# This configuration will be ignored if `skipUpgradeChecks` is `true`.
# Default is false.
upgradeOSDRequiresHealthyPGs: false
mon:
# Set the number of mons to be started. Generally recommended to be 3.
# For highest availability, an odd number of mons should be specified.
count: 3
# The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
# Mons should only be allowed on the same node for test environments where data loss is acceptable.
allowMultiplePerNode: false
mgr:
# When higher availability of the mgr is needed, increase the count to 2.
# In that case, one mgr will be active and one in standby. When Ceph updates which
# mgr is active, Rook will update the mgr services to match the active mgr.
count: 2
allowMultiplePerNode: false
modules:
# List of modules to optionally enable or disable.
# Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR.
# - name: rook
# enabled: true
# enable the ceph dashboard for viewing cluster status
dashboard:
enabled: true
# serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
# urlPrefix: /ceph-dashboard
# serve the dashboard at the given port.
# port: 8443
# Serve the dashboard using SSL (if using ingress to expose the dashboard and `ssl: true` you need to set
# the corresponding "backend protocol" annotation(s) for your ingress controller of choice)
ssl: true
# Network configuration, see: https://github.com/rook/rook/blob/master/Documentation/CRDs/Cluster/ceph-cluster-crd.md#network-configuration-settings
network:
connections:
# Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
# The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
# When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
# IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
# you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
# The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
encryption:
enabled: false
# Whether to compress the data in transit across the wire. The default is false.
# The kernel requirements above for encryption also apply to compression.
compression:
enabled: false
# Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
# and clients will be required to connect to the Ceph cluster with the v2 port (3300).
# Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
requireMsgr2: false
# # enable host networking
# provider: host
# # EXPERIMENTAL: enable the Multus network provider
# provider: multus
# selectors:
# # The selector keys are required to be `public` and `cluster`.
# # Based on the configuration, the operator will do the following:
# # 1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
# # 2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
# #
# # In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
# #
# # public: public-conf --> NetworkAttachmentDefinition object name in Multus
# # cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
# # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
# ipFamily: "IPv6"
# # Ceph daemons to listen on both IPv4 and Ipv6 networks
# dualStack: false
# enable the crash collector for ceph daemon crash collection
crashCollector:
disable: false
# Uncomment daysToRetain to prune ceph crash entries older than the
# specified number of days.
# daysToRetain: 30
# enable log collector, daemons will log on files and rotate
logCollector:
enabled: true
periodicity: daily # one of: hourly, daily, weekly, monthly
maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
# automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
cleanupPolicy:
# Since cluster cleanup is destructive to data, confirmation is required.
# To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
# This value should only be set when the cluster is about to be deleted. After the confirmation is set,
# Rook will immediately stop configuring the cluster and only wait for the delete command.
# If the empty string is set, Rook will not destroy any data on hosts during uninstall.
confirmation: ""
# sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
sanitizeDisks:
# method indicates if the entire disk should be sanitized or simply ceph's metadata
# in both case, re-install is possible
# possible choices are 'complete' or 'quick' (default)
method: quick
# dataSource indicate where to get random bytes from to write on the disk
# possible choices are 'zero' (default) or 'random'
# using random sources will consume entropy from the system and will take much more time then the zero source
dataSource: zero
# iteration overwrite N times instead of the default (1)
# takes an integer value
iteration: 1
# allowUninstallWithVolumes defines how the uninstall should be performed
# If set to true, cephCluster deletion does not wait for the PVs to be deleted.
allowUninstallWithVolumes: false
# To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
# The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
# tolerate taints with a key of 'storage-node'.
# placement:
# all:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: role
# operator: In
# values:
# - storage-node
# podAffinity:
# podAntiAffinity:
# topologySpreadConstraints:
# tolerations:
# - key: storage-node
# operator: Exists
# # The above placement information can also be specified for mon, osd, and mgr components
# mon:
# # Monitor deployments may contain an anti-affinity rule for avoiding monitor
# # collocation on the same node. This is a required rule when host network is used
# # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
# # preferred rule with weight: 50.
# osd:
# mgr:
# cleanup:
# annotations:
# all:
# mon:
# osd:
# cleanup:
# prepareosd:
# # If no mgr annotations are set, prometheus scrape annotations will be set by default.
# mgr:
# dashboard:
# labels:
# all:
# mon:
# osd:
# cleanup:
# mgr:
# prepareosd:
# # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
# # These labels can be passed as LabelSelector to Prometheus
# monitoring:
# dashboard:
resources:
mgr:
limits:
memory: "1Gi"
requests:
cpu: "500m"
memory: "512Mi"
mon:
limits:
memory: "2Gi"
requests:
cpu: "1000m"
memory: "1Gi"
osd:
limits:
memory: "4Gi"
requests:
cpu: "1000m"
memory: "4Gi"
prepareosd:
# limits: It is not recommended to set limits on the OSD prepare job
# since it's a one-time burst for memory that must be allowed to
# complete without an OOM kill. Note however that if a k8s
# limitRange guardrail is defined external to Rook, the lack of
# a limit here may result in a sync failure, in which case a
# limit should be added. 1200Mi may suffice for up to 15Ti
# OSDs ; for larger devices 2Gi may be required.
# cf. https://github.com/rook/rook/pull/11103
requests:
cpu: "500m"
memory: "50Mi"
mgr-sidecar:
limits:
memory: "100Mi"
requests:
cpu: "100m"
memory: "40Mi"
crashcollector:
limits:
memory: "60Mi"
requests:
cpu: "100m"
memory: "60Mi"
logcollector:
limits:
memory: "1Gi"
requests:
cpu: "100m"
memory: "100Mi"
cleanup:
limits:
memory: "1Gi"
requests:
cpu: "500m"
memory: "100Mi"
exporter:
limits:
memory: "128Mi"
requests:
cpu: "50m"
memory: "50Mi"
# The option to automatically remove OSDs that are out and are safe to destroy.
removeOSDsIfOutAndSafeToRemove: false
# priority classes to apply to ceph resources
priorityClassNames:
mon: system-node-critical
osd: system-node-critical
mgr: system-cluster-critical
storage: # cluster level storage configuration and selection
useAllNodes: true
useAllDevices: true
# deviceFilter:
# config:
# crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
# metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
# databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
# osdsPerDevice: "1" # this value can be overridden at the node or device level
# encryptedDevice: "true" # the default value for this option is "false"
# # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
# # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
# nodes:
# - name: "172.17.4.201"
# devices: # specific devices to use for storage can be specified for each node
# - name: "sdb"
# - name: "nvme01" # multiple osds can be created on high performance devices
# config:
# osdsPerDevice: "5"
# - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
# config: # configuration can be specified at the node level which overrides the cluster level config
# - name: "172.17.4.301"
# deviceFilter: "^sd."
# The section for configuring management of daemon disruptions during upgrade or fencing.
disruptionManagement:
# If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
# via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
# block eviction of OSDs by default and unblock them safely when drains are detected.
managePodBudgets: true
# A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
# default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true`. The default value is `30` minutes.
osdMaintenanceTimeout: 30
# Configure the healthcheck and liveness probes for ceph pods.
# Valid values for daemons are 'mon', 'osd', 'status'
healthCheck:
daemonHealth:
mon:
disabled: false
interval: 45s
osd:
disabled: false
interval: 60s
status:
disabled: false
interval: 60s
# Change pod liveness probe, it works for all mon, mgr, and osd pods.
livenessProbe:
mon:
disabled: false
mgr:
disabled: false
osd:
disabled: false
ingress:
# -- Enable an ingress for the ceph-dashboard
dashboard:
# {}
# labels:
# external-dns/private: "true"
annotations:
"route.openshift.io/termination": "passthrough"
# external-dns.alpha.kubernetes.io/hostname: dashboard.example.com
# nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
# If the dashboard has ssl: true the following will make sure the NGINX Ingress controller can expose the dashboard correctly
# nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
# nginx.ingress.kubernetes.io/server-snippet: |
# proxy_ssl_verify off;
host:
name: ceph.apps.ncd0.harmony.mcd
path: null # TODO the chart does not allow removing the path, and it causes openshift to fail creating a route, because path is not supported with termination mode passthrough
pathType: ImplementationSpecific
tls:
- {}
# secretName: testsecret-tls
# Note: Only one of ingress class annotation or the `ingressClassName:` can be used at a time
# to set the ingress class
# ingressClassName: openshift-default
# labels:
# external-dns/private: "true"
# annotations:
# external-dns.alpha.kubernetes.io/hostname: dashboard.example.com
# nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
# If the dashboard has ssl: true the following will make sure the NGINX Ingress controller can expose the dashboard correctly
# nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
# nginx.ingress.kubernetes.io/server-snippet: |
# proxy_ssl_verify off;
# host:
# name: dashboard.example.com
# path: "/ceph-dashboard(/|$)(.*)"
# pathType: Prefix
# tls:
# - hosts:
# - dashboard.example.com
# secretName: testsecret-tls
## Note: Only one of ingress class annotation or the `ingressClassName:` can be used at a time
## to set the ingress class
# ingressClassName: nginx
# -- A list of CephBlockPool configurations to deploy
# @default -- See [below](#ceph-block-pools)
cephBlockPools:
- name: ceph-blockpool
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Block-Storage/ceph-block-pool-crd.md#spec for available configuration
spec:
failureDomain: host
replicated:
size: 3
# Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false.
# For reference: https://docs.ceph.com/docs/latest/mgr/prometheus/#rbd-io-statistics
# enableRBDStats: true
storageClass:
enabled: true
name: ceph-block
annotations: {}
labels: {}
isDefault: true
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
mountOptions: []
# see https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies
allowedTopologies: []
# - matchLabelExpressions:
# - key: rook-ceph-role
# values:
# - storage-node
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md#provision-storage for available configuration
parameters:
# (optional) mapOptions is a comma-separated list of map options.
# For krbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# mapOptions: lock_on_read,queue_depth=1024
# (optional) unmapOptions is a comma-separated list of unmap options.
# For krbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# unmapOptions: force
# RBD image format. Defaults to "2".
imageFormat: "2"
# RBD image features, equivalent to OR'd bitfield value: 63
# Available for imageFormat: "2". Older releases of CSI RBD
# support only the `layering` feature. The Linux kernel (KRBD) supports the
# full feature complement as of 5.4
imageFeatures: layering
# These secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# -- A list of CephFileSystem configurations to deploy
# @default -- See [below](#ceph-file-systems)
cephFileSystems:
- name: ceph-filesystem
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#filesystem-settings for available configuration
spec:
metadataPool:
replicated:
size: 3
dataPools:
- failureDomain: host
replicated:
size: 3
# Optional and highly recommended, 'data0' by default, see https://github.com/rook/rook/blob/master/Documentation/CRDs/Shared-Filesystem/ceph-filesystem-crd.md#pools
name: data0
metadataServer:
activeCount: 1
activeStandby: true
resources:
limits:
memory: "4Gi"
requests:
cpu: "1000m"
memory: "4Gi"
priorityClassName: system-cluster-critical
storageClass:
enabled: true
isDefault: false
name: ceph-filesystem
# (Optional) specify a data pool to use, must be the name of one of the data pools above, 'data0' by default
pool: data0
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
annotations: {}
labels: {}
mountOptions: []
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage.md#provision-storage for available configuration
parameters:
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# -- Settings for the filesystem snapshot class
# @default -- See [CephFS Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#cephfs-snapshots)
cephFileSystemVolumeSnapshotClass:
enabled: false
name: ceph-filesystem
isDefault: true
deletionPolicy: Delete
annotations: {}
labels: {}
# see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshots for available configuration
parameters: {}
# -- Settings for the block pool snapshot class
# @default -- See [RBD Snapshots](../Storage-Configuration/Ceph-CSI/ceph-csi-snapshot.md#rbd-snapshots)
cephBlockPoolsVolumeSnapshotClass:
enabled: false
name: ceph-block
isDefault: false
deletionPolicy: Delete
annotations: {}
labels: {}
# see https://rook.io/docs/rook/v1.10/Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshots for available configuration
parameters: {}
# -- A list of CephObjectStore configurations to deploy
# @default -- See [below](#ceph-object-stores)
cephObjectStores:
- name: ceph-objectstore
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Object-Storage/ceph-object-store-crd.md#object-store-settings for available configuration
spec:
metadataPool:
failureDomain: host
replicated:
size: 3
dataPool:
failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
parameters:
bulk: "true"
preservePoolsOnDelete: true
gateway:
port: 80
resources:
limits:
memory: "2Gi"
requests:
cpu: "1000m"
memory: "1Gi"
# securePort: 443
# sslCertificateRef:
instances: 1
priorityClassName: system-cluster-critical
# opsLogSidecar:
# resources:
# limits:
# memory: "100Mi"
# requests:
# cpu: "100m"
# memory: "40Mi"
storageClass:
enabled: true
name: ceph-bucket
reclaimPolicy: Delete
volumeBindingMode: "Immediate"
annotations: {}
labels: {}
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim.md#storageclass for available configuration
parameters:
# note: objectStoreNamespace and objectStoreName are configured by the chart
region: us-east-1
ingress:
# Enable an ingress for the ceph-objectstore
enabled: true
# The ingress port by default will be the object store's "securePort" (if set), or the gateway "port".
# To override those defaults, set this ingress port to the desired port.
# port: 80
# annotations: {}
host:
name: objectstore.apps.ncd0.harmony.mcd
path: /
pathType: Prefix
# tls:
# - hosts:
# - objectstore.example.com
# secretName: ceph-objectstore-tls
# ingressClassName: nginx
## cephECBlockPools are disabled by default, please remove the comments and set desired values to enable it
## For erasure coded a replicated metadata pool is required.
## https://rook.io/docs/rook/latest/CRDs/Shared-Filesystem/ceph-filesystem-crd/#erasure-coded
#cephECBlockPools:
# - name: ec-pool
# spec:
# metadataPool:
# replicated:
# size: 2
# dataPool:
# failureDomain: osd
# erasureCoded:
# dataChunks: 2
# codingChunks: 1
# deviceClass: hdd
#
# parameters:
# # clusterID is the namespace where the rook cluster is running
# # If you change this namespace, also change the namespace below where the secret namespaces are defined
# clusterID: rook-ceph # namespace:cluster
# # (optional) mapOptions is a comma-separated list of map options.
# # For krbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# # For nbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# # mapOptions: lock_on_read,queue_depth=1024
#
# # (optional) unmapOptions is a comma-separated list of unmap options.
# # For krbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd/#kernel-rbd-krbd-options
# # For nbd options refer
# # https://docs.ceph.com/docs/latest/man/8/rbd-nbd/#options
# # unmapOptions: force
#
# # RBD image format. Defaults to "2".
# imageFormat: "2"
#
# # RBD image features, equivalent to OR'd bitfield value: 63
# # Available for imageFormat: "2". Older releases of CSI RBD
# # support only the `layering` feature. The Linux kernel (KRBD) supports the
# # full feature complement as of 5.4
# # imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
# imageFeatures: layering
#
# storageClass:
# provisioner: rook-ceph.rbd.csi.ceph.com # csi-provisioner-name
# enabled: true
# name: rook-ceph-block
# isDefault: false
# annotations: { }
# labels: { }
# allowVolumeExpansion: true
# reclaimPolicy: Delete
# -- CSI driver name prefix for cephfs, rbd and nfs.
# @default -- `namespace name where rook-ceph operator is deployed`
csiDriverNamePrefix:

View File

@@ -1,3 +0,0 @@
#!/bin/bash
helm repo add rook-release https://charts.rook.io/release
helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml

View File

@@ -1,674 +0,0 @@
# Default values for rook-ceph-operator
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
image:
# -- Image
repository: docker.io/rook/ceph
# -- Image tag
# @default -- `master`
tag: v1.17.1
# -- Image pull policy
pullPolicy: IfNotPresent
crds:
# -- Whether the helm chart should create and update the CRDs. If false, the CRDs must be
# managed independently with deploy/examples/crds.yaml.
# **WARNING** Only set during first deployment. If later disabled the cluster may be DESTROYED.
# If the CRDs are deleted in this case, see
# [the disaster recovery guide](https://rook.io/docs/rook/latest/Troubleshooting/disaster-recovery/#restoring-crds-after-deletion)
# to restore them.
enabled: true
# -- Pod resource requests & limits
resources:
limits:
memory: 512Mi
requests:
cpu: 200m
memory: 128Mi
# -- Kubernetes [`nodeSelector`](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) to add to the Deployment.
nodeSelector: {}
# Constraint rook-ceph-operator Deployment to nodes with label `disktype: ssd`.
# For more info, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector
# disktype: ssd
# -- List of Kubernetes [`tolerations`](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) to add to the Deployment.
tolerations: []
# -- Delay to use for the `node.kubernetes.io/unreachable` pod failure toleration to override
# the Kubernetes default of 5 minutes
unreachableNodeTolerationSeconds: 5
# -- Whether the operator should watch cluster CRD in its own namespace or not
currentNamespaceOnly: false
# -- Custom pod labels for the operator
operatorPodLabels: {}
# -- Pod annotations
annotations: {}
# -- Global log level for the operator.
# Options: `ERROR`, `WARNING`, `INFO`, `DEBUG`
logLevel: INFO
# -- If true, create & use RBAC resources
rbacEnable: true
rbacAggregate:
# -- If true, create a ClusterRole aggregated to [user facing roles](https://kubernetes.io/docs/reference/access-authn-authz/rbac/#user-facing-roles) for objectbucketclaims
enableOBCs: false
# -- If true, create & use PSP resources
pspEnable: false
# -- Set the priority class for the rook operator deployment if desired
priorityClassName:
# -- Set the container security context for the operator
containerSecurityContext:
runAsNonRoot: true
runAsUser: 2016
runAsGroup: 2016
capabilities:
drop: ["ALL"]
# -- If true, loop devices are allowed to be used for osds in test clusters
allowLoopDevices: false
# Settings for whether to disable the drivers or other daemons if they are not
# needed
csi:
# -- Enable Ceph CSI RBD driver
enableRbdDriver: true
# -- Enable Ceph CSI CephFS driver
enableCephfsDriver: true
# -- Disable the CSI driver.
disableCsiDriver: "false"
# -- Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary
# in some network configurations where the SDN does not provide access to an external cluster or
# there is significant drop in read/write performance
enableCSIHostNetwork: true
# -- Enable Snapshotter in CephFS provisioner pod
enableCephfsSnapshotter: true
# -- Enable Snapshotter in NFS provisioner pod
enableNFSSnapshotter: true
# -- Enable Snapshotter in RBD provisioner pod
enableRBDSnapshotter: true
# -- Enable Host mount for `/etc/selinux` directory for Ceph CSI nodeplugins
enablePluginSelinuxHostMount: false
# -- Enable Ceph CSI PVC encryption support
enableCSIEncryption: false
# -- Enable volume group snapshot feature. This feature is
# enabled by default as long as the necessary CRDs are available in the cluster.
enableVolumeGroupSnapshot: true
# -- PriorityClassName to be set on csi driver plugin pods
pluginPriorityClassName: system-node-critical
# -- PriorityClassName to be set on csi driver provisioner pods
provisionerPriorityClassName: system-cluster-critical
# -- Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
rbdFSGroupPolicy: "File"
# -- Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
cephFSFSGroupPolicy: "File"
# -- Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted.
# supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html
nfsFSGroupPolicy: "File"
# -- OMAP generator generates the omap mapping between the PV name and the RBD image
# which helps CSI to identify the rbd images for CSI operations.
# `CSI_ENABLE_OMAP_GENERATOR` needs to be enabled when we are using rbd mirroring feature.
# By default OMAP generator is disabled and when enabled, it will be deployed as a
# sidecar with CSI provisioner pod, to enable set it to true.
enableOMAPGenerator: false
# -- Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options.
# Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR
cephFSKernelMountOptions:
# -- Enable adding volume metadata on the CephFS subvolumes and RBD images.
# Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images.
# Hence enable metadata is false by default
enableMetadata: false
# -- Set replicas for csi provisioner deployment
provisionerReplicas: 2
# -- Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful
# in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster
clusterName:
# -- Set logging level for cephCSI containers maintained by the cephCSI.
# Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity.
logLevel: 0
# -- Set logging level for Kubernetes-csi sidecar containers.
# Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity.
# @default -- `0`
sidecarLogLevel:
# -- CSI driver name prefix for cephfs, rbd and nfs.
# @default -- `namespace name where rook-ceph operator is deployed`
csiDriverNamePrefix:
# -- CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
rbdPluginUpdateStrategy:
# -- A maxUnavailable parameter of CSI RBD plugin daemonset update strategy.
# @default -- `1`
rbdPluginUpdateStrategyMaxUnavailable:
# -- CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
cephFSPluginUpdateStrategy:
# -- A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy.
# @default -- `1`
cephFSPluginUpdateStrategyMaxUnavailable:
# -- CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate
# @default -- `RollingUpdate`
nfsPluginUpdateStrategy:
# -- Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150
grpcTimeoutInSeconds: 150
# -- Burst to use while communicating with the kubernetes apiserver.
kubeApiBurst:
# -- QPS to use while communicating with the kubernetes apiserver.
kubeApiQPS:
# -- The volume of the CephCSI RBD plugin DaemonSet
csiRBDPluginVolume:
# - name: lib-modules
# hostPath:
# path: /run/booted-system/kernel-modules/lib/modules/
# - name: host-nix
# hostPath:
# path: /nix
# -- The volume mounts of the CephCSI RBD plugin DaemonSet
csiRBDPluginVolumeMount:
# - name: host-nix
# mountPath: /nix
# readOnly: true
# -- The volume of the CephCSI CephFS plugin DaemonSet
csiCephFSPluginVolume:
# - name: lib-modules
# hostPath:
# path: /run/booted-system/kernel-modules/lib/modules/
# - name: host-nix
# hostPath:
# path: /nix
# -- The volume mounts of the CephCSI CephFS plugin DaemonSet
csiCephFSPluginVolumeMount:
# - name: host-nix
# mountPath: /nix
# readOnly: true
# -- CEPH CSI RBD provisioner resource requirement list
# csi-omap-generator resources will be applied only if `enableOMAPGenerator` is set to `true`
# @default -- see values.yaml
csiRBDProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-resizer
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-attacher
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-snapshotter
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-rbdplugin
resource:
requests:
memory: 512Mi
limits:
memory: 1Gi
- name : csi-omap-generator
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI RBD plugin resource requirement list
# @default -- see values.yaml
csiRBDPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-rbdplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI CephFS provisioner resource requirement list
# @default -- see values.yaml
csiCephFSProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-resizer
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-attacher
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-snapshotter
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-cephfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI CephFS plugin resource requirement list
# @default -- see values.yaml
csiCephFSPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-cephfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : liveness-prometheus
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
# -- CEPH CSI NFS provisioner resource requirement list
# @default -- see values.yaml
csiNFSProvisionerResource: |
- name : csi-provisioner
resource:
requests:
memory: 128Mi
cpu: 100m
limits:
memory: 256Mi
- name : csi-nfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
- name : csi-attacher
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
# -- CEPH CSI NFS plugin resource requirement list
# @default -- see values.yaml
csiNFSPluginResource: |
- name : driver-registrar
resource:
requests:
memory: 128Mi
cpu: 50m
limits:
memory: 256Mi
- name : csi-nfsplugin
resource:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 1Gi
# Set provisionerTolerations and provisionerNodeAffinity for provisioner pod.
# The CSI provisioner would be best to start on the same nodes as other ceph daemons.
# -- Array of tolerations in YAML format which will be added to CSI provisioner deployment
provisionerTolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of the CSI provisioner deployment [^1]
provisionerNodeAffinity: #key1=value1,value2; key2=value3
# Set pluginTolerations and pluginNodeAffinity for plugin daemonset pods.
# The CSI plugins need to be started on all the nodes where the clients need to mount the storage.
# -- Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet
pluginTolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of the CephCSI RBD plugin DaemonSet [^1]
pluginNodeAffinity: # key1=value1,value2; key2=value3
# -- Enable Ceph CSI Liveness sidecar deployment
enableLiveness: false
# -- CSI CephFS driver metrics port
# @default -- `9081`
cephfsLivenessMetricsPort:
# -- CSI Addons server port
# @default -- `9070`
csiAddonsPort:
# -- CSI Addons server port for the RBD provisioner
# @default -- `9070`
csiAddonsRBDProvisionerPort:
# -- CSI Addons server port for the Ceph FS provisioner
# @default -- `9070`
csiAddonsCephFSProvisionerPort:
# -- Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS
# you may want to disable this setting. However, this will cause an issue during upgrades
# with the FUSE client. See the [upgrade guide](https://rook.io/docs/rook/v1.2/ceph-upgrade.html)
forceCephFSKernelClient: true
# -- Ceph CSI RBD driver metrics port
# @default -- `8080`
rbdLivenessMetricsPort:
serviceMonitor:
# -- Enable ServiceMonitor for Ceph CSI drivers
enabled: false
# -- Service monitor scrape interval
interval: 10s
# -- ServiceMonitor additional labels
labels: {}
# -- Use a different namespace for the ServiceMonitor
namespace:
# -- Kubelet root directory path (if the Kubelet uses a different path for the `--root-dir` flag)
# @default -- `/var/lib/kubelet`
kubeletDirPath:
# -- Duration in seconds that non-leader candidates will wait to force acquire leadership.
# @default -- `137s`
csiLeaderElectionLeaseDuration:
# -- Deadline in seconds that the acting leader will retry refreshing leadership before giving up.
# @default -- `107s`
csiLeaderElectionRenewDeadline:
# -- Retry period in seconds the LeaderElector clients should wait between tries of actions.
# @default -- `26s`
csiLeaderElectionRetryPeriod:
cephcsi:
# -- Ceph CSI image repository
repository: quay.io/cephcsi/cephcsi
# -- Ceph CSI image tag
tag: v3.14.0
registrar:
# -- Kubernetes CSI registrar image repository
repository: registry.k8s.io/sig-storage/csi-node-driver-registrar
# -- Registrar image tag
tag: v2.13.0
provisioner:
# -- Kubernetes CSI provisioner image repository
repository: registry.k8s.io/sig-storage/csi-provisioner
# -- Provisioner image tag
tag: v5.1.0
snapshotter:
# -- Kubernetes CSI snapshotter image repository
repository: registry.k8s.io/sig-storage/csi-snapshotter
# -- Snapshotter image tag
tag: v8.2.0
attacher:
# -- Kubernetes CSI Attacher image repository
repository: registry.k8s.io/sig-storage/csi-attacher
# -- Attacher image tag
tag: v4.8.0
resizer:
# -- Kubernetes CSI resizer image repository
repository: registry.k8s.io/sig-storage/csi-resizer
# -- Resizer image tag
tag: v1.13.1
# -- Image pull policy
imagePullPolicy: IfNotPresent
# -- Labels to add to the CSI CephFS Deployments and DaemonSets Pods
cephfsPodLabels: #"key1=value1,key2=value2"
# -- Labels to add to the CSI NFS Deployments and DaemonSets Pods
nfsPodLabels: #"key1=value1,key2=value2"
# -- Labels to add to the CSI RBD Deployments and DaemonSets Pods
rbdPodLabels: #"key1=value1,key2=value2"
csiAddons:
# -- Enable CSIAddons
enabled: false
# -- CSIAddons sidecar image repository
repository: quay.io/csiaddons/k8s-sidecar
# -- CSIAddons sidecar image tag
tag: v0.12.0
nfs:
# -- Enable the nfs csi driver
enabled: false
topology:
# -- Enable topology based provisioning
enabled: false
# NOTE: the value here serves as an example and needs to be
# updated with node labels that define domains of interest
# -- domainLabels define which node labels to use as domains
# for CSI nodeplugins to advertise their domains
domainLabels:
# - kubernetes.io/hostname
# - topology.kubernetes.io/zone
# - topology.rook.io/rack
# -- Whether to skip any attach operation altogether for CephFS PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation
# of pods using the CephFS PVC fast. **WARNING** It's highly discouraged to use this for
# CephFS RWO volumes. Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
cephFSAttachRequired: true
# -- Whether to skip any attach operation altogether for RBD PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast.
# **WARNING** It's highly discouraged to use this for RWO volumes as it can cause data corruption.
# csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set
# to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on.
# Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
rbdAttachRequired: true
# -- Whether to skip any attach operation altogether for NFS PVCs. See more details
# [here](https://kubernetes-csi.github.io/docs/skip-attach.html#skip-attach-with-csi-driver-object).
# If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation
# of pods using the NFS PVC fast. **WARNING** It's highly discouraged to use this for
# NFS RWO volumes. Refer to this [issue](https://github.com/kubernetes/kubernetes/issues/103305) for more details.
nfsAttachRequired: true
# -- Enable discovery daemon
enableDiscoveryDaemon: false
# -- Set the discovery daemon device discovery interval (default to 60m)
discoveryDaemonInterval: 60m
# -- The timeout for ceph commands in seconds
cephCommandsTimeoutSeconds: "15"
# -- If true, run rook operator on the host network
useOperatorHostNetwork:
# -- If true, scale down the rook operator.
# This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling
# to deploy your helm charts.
scaleDownOperator: false
## Rook Discover configuration
## toleration: NoSchedule, PreferNoSchedule or NoExecute
## tolerationKey: Set this to the specific key of the taint to tolerate
## tolerations: Array of tolerations in YAML format which will be added to agent deployment
## nodeAffinity: Set to labels of the node to match
discover:
# -- Toleration for the discover pods.
# Options: `NoSchedule`, `PreferNoSchedule` or `NoExecute`
toleration:
# -- The specific key of the taint to tolerate
tolerationKey:
# -- Array of tolerations in YAML format which will be added to discover deployment
tolerations:
# - key: key
# operator: Exists
# effect: NoSchedule
# -- The node labels for affinity of `discover-agent` [^1]
nodeAffinity:
# key1=value1,value2; key2=value3
#
# or
#
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: storage-node
# operator: Exists
# -- Labels to add to the discover pods
podLabels: # "key1=value1,key2=value2"
# -- Add resources to discover daemon pods
resources:
# - limits:
# memory: 512Mi
# - requests:
# cpu: 100m
# memory: 128Mi
# -- Custom label to identify node hostname. If not set `kubernetes.io/hostname` will be used
customHostnameLabel:
# -- Runs Ceph Pods as privileged to be able to write to `hostPaths` in OpenShift with SELinux restrictions.
hostpathRequiresPrivileged: false
# -- Whether to create all Rook pods to run on the host network, for example in environments where a CNI is not enabled
enforceHostNetwork: false
# -- Disable automatic orchestration when new devices are discovered.
disableDeviceHotplug: false
# -- The revision history limit for all pods created by Rook. If blank, the K8s default is 10.
revisionHistoryLimit:
# -- Blacklist certain disks according to the regex provided.
discoverDaemonUdev:
# -- imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts.
imagePullSecrets:
# - name: my-registry-secret
# -- Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used
enableOBCWatchOperatorNamespace: true
# -- Specify the prefix for the OBC provisioner in place of the cluster namespace
# @default -- `ceph cluster namespace`
obcProvisionerNamePrefix:
# -- Many OBC additional config fields may be risky for administrators to allow users control over.
# The safe and default-allowed fields are 'maxObjects' and 'maxSize'.
# Other fields should be considered risky. To allow all additional configs, use this value:
# "maxObjects,maxSize,bucketMaxObjects,bucketMaxSize,bucketPolicy,bucketLifecycle,bucketOwner"
# @default -- "maxObjects,maxSize"
obcAllowAdditionalConfigFields: "maxObjects,maxSize"
monitoring:
# -- Enable monitoring. Requires Prometheus to be pre-installed.
# Enabling will also create RBAC rules to allow Operator to create ServiceMonitors
enabled: false

View File

@@ -1,200 +0,0 @@
use std::{
net::{IpAddr, Ipv4Addr},
sync::{Arc, OnceLock},
};
use brocade::BrocadeOptions;
use cidr::Ipv4Cidr;
use harmony::{
config::secret::SshKeyPair,
data::{FileContent, FilePath},
hardware::{HostCategory, Location, PhysicalHost, SwitchGroup},
infra::{brocade::BrocadeSwitchClient, opnsense::OPNSenseManagementInterface},
inventory::Inventory,
modules::{
http::StaticFilesHttpScore,
okd::{
bootstrap_dhcp::OKDBootstrapDhcpScore,
bootstrap_load_balancer::OKDBootstrapLoadBalancerScore, dhcp::OKDDhcpScore,
dns::OKDDnsScore, ipxe::OKDIpxeScore,
},
tftp::TftpScore,
},
topology::{LogicalHost, UnmanagedRouter},
};
use harmony_macros::{ip, mac_address};
use harmony_secret::{Secret, SecretManager};
use harmony_types::net::Url;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[tokio::main]
async fn main() {
let firewall = harmony::topology::LogicalHost {
ip: ip!("192.168.33.1"),
name: String::from("fw0"),
};
let switch_auth = SecretManager::get_or_prompt::<BrocadeSwitchAuth>()
.await
.expect("Failed to get credentials");
let switches: Vec<IpAddr> = vec![ip!("192.168.33.101")];
let brocade_options = BrocadeOptions {
dry_run: *harmony::config::DRY_RUN,
..Default::default()
};
let switch_client = BrocadeSwitchClient::init(
&switches,
&switch_auth.username,
&switch_auth.password,
brocade_options,
)
.await
.expect("Failed to connect to switch");
let switch_client = Arc::new(switch_client);
let opnsense = Arc::new(
harmony::infra::opnsense::OPNSenseFirewall::new(firewall, None, "root", "opnsense").await,
);
let lan_subnet = Ipv4Addr::new(192, 168, 33, 0);
let gateway_ipv4 = Ipv4Addr::new(192, 168, 33, 1);
let gateway_ip = IpAddr::V4(gateway_ipv4);
let topology = harmony::topology::HAClusterTopology {
kubeconfig: None,
domain_name: "ncd0.harmony.mcd".to_string(), // TODO this must be set manually correctly
// when setting up the opnsense firewall
router: Arc::new(UnmanagedRouter::new(
gateway_ip,
Ipv4Cidr::new(lan_subnet, 24).unwrap(),
)),
load_balancer: opnsense.clone(),
firewall: opnsense.clone(),
tftp_server: opnsense.clone(),
http_server: opnsense.clone(),
dhcp_server: opnsense.clone(),
dns_server: opnsense.clone(),
control_plane: vec![
LogicalHost {
ip: ip!("192.168.33.20"),
name: "cp0".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.21"),
name: "cp1".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.22"),
name: "cp2".to_string(),
},
],
bootstrap_host: LogicalHost {
ip: ip!("192.168.33.66"),
name: "bootstrap".to_string(),
},
workers: vec![
LogicalHost {
ip: ip!("192.168.33.30"),
name: "wk0".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.31"),
name: "wk1".to_string(),
},
LogicalHost {
ip: ip!("192.168.33.32"),
name: "wk2".to_string(),
},
],
node_exporter: opnsense.clone(),
switch_client: switch_client.clone(),
network_manager: OnceLock::new(),
};
let inventory = Inventory {
location: Location::new("I am mobile".to_string(), "earth".to_string()),
switch: SwitchGroup::from([]),
firewall_mgmt: Box::new(OPNSenseManagementInterface::new()),
storage_host: vec![],
worker_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:0F")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:26")),
// thisone
// Then create the ipxe file
// set the dns static leases
// bootstrap nodes
// start ceph cluster
// try installation of lampscore
// bingo?
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:70")),
],
control_plane_host: vec![
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:60:FA")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:02:61:1A")),
PhysicalHost::empty(HostCategory::Server)
.mac_address(mac_address!("C4:62:37:01:BC:68")),
],
};
// TODO regroup smaller scores in a larger one such as this
// let okd_boostrap_preparation();
let bootstrap_dhcp_score = OKDBootstrapDhcpScore::new(&topology, &inventory);
let bootstrap_load_balancer_score = OKDBootstrapLoadBalancerScore::new(&topology);
let dhcp_score = OKDDhcpScore::new(&topology, &inventory);
let dns_score = OKDDnsScore::new(&topology);
let load_balancer_score =
harmony::modules::okd::load_balancer::OKDLoadBalancerScore::new(&topology);
let ssh_key = SecretManager::get_or_prompt::<SshKeyPair>().await.unwrap();
let tftp_score = TftpScore::new(Url::LocalFolder("./data/watchguard/tftpboot".to_string()));
let http_score = StaticFilesHttpScore {
folder_to_serve: Some(Url::LocalFolder(
"./data/watchguard/pxe-http-files".to_string(),
)),
files: vec![],
remote_path: None,
};
let kickstart_filename = "inventory.kickstart".to_string();
let harmony_inventory_agent = "harmony_inventory_agent".to_string();
let ipxe_score = OKDIpxeScore {
kickstart_filename,
harmony_inventory_agent,
cluster_pubkey: FileContent {
path: FilePath::Relative("cluster_ssh_key.pub".to_string()),
content: ssh_key.public,
},
};
harmony_tui::run(
inventory,
topology,
vec![
Box::new(dns_score),
Box::new(bootstrap_dhcp_score),
Box::new(bootstrap_load_balancer_score),
Box::new(load_balancer_score),
Box::new(tftp_score),
Box::new(http_score),
Box::new(ipxe_score),
Box::new(dhcp_score),
],
)
.await
.unwrap();
}
#[derive(Secret, Serialize, Deserialize, JsonSchema, Debug)]
pub struct BrocadeSwitchAuth {
pub username: String,
pub password: String,
}

View File

@@ -1,6 +1,8 @@
use async_trait::async_trait; use async_trait::async_trait;
use crate::{hardware::PhysicalHost, interpret::InterpretError, inventory::HostRole}; use crate::{
hardware::PhysicalHost, interpret::InterpretError, inventory::HostRole, topology::HostConfig,
};
/// Errors that can occur within the repository layer. /// Errors that can occur within the repository layer.
#[derive(thiserror::Error, Debug)] #[derive(thiserror::Error, Debug)]
@@ -29,10 +31,14 @@ pub trait InventoryRepository: Send + Sync + 'static {
async fn save(&self, host: &PhysicalHost) -> Result<(), RepoError>; async fn save(&self, host: &PhysicalHost) -> Result<(), RepoError>;
async fn get_latest_by_id(&self, host_id: &str) -> Result<Option<PhysicalHost>, RepoError>; async fn get_latest_by_id(&self, host_id: &str) -> Result<Option<PhysicalHost>, RepoError>;
async fn get_all_hosts(&self) -> Result<Vec<PhysicalHost>, RepoError>; async fn get_all_hosts(&self) -> Result<Vec<PhysicalHost>, RepoError>;
async fn get_host_for_role(&self, role: &HostRole) -> Result<Vec<PhysicalHost>, RepoError>; async fn get_hosts_for_role(
&self,
role: &HostRole,
) -> Result<Vec<(PhysicalHost, HostConfig)>, RepoError>;
async fn save_role_mapping( async fn save_role_mapping(
&self, &self,
role: &HostRole, role: &HostRole,
host: &PhysicalHost, host: &PhysicalHost,
installation_device: &String,
) -> Result<(), RepoError>; ) -> Result<(), RepoError>;
} }

View File

@@ -7,12 +7,17 @@ use super::LogicalHost;
/// Represents the binding between a LogicalHost and a PhysicalHost. /// Represents the binding between a LogicalHost and a PhysicalHost.
/// ///
///
/// This is the only construct that directly maps a logical host to a physical host. /// This is the only construct that directly maps a logical host to a physical host.
/// It serves as a bridge between the logical cluster structure and the physical infrastructure. /// It serves as a bridge between the logical cluster structure and the physical infrastructure.
#[derive(Debug, new, Clone, Serialize)] #[derive(Debug, new, Clone, Serialize)]
pub struct HostBinding { pub struct HostBinding {
/// Reference to the LogicalHost
pub logical_host: LogicalHost, pub logical_host: LogicalHost,
/// Reference to the PhysicalHost
pub physical_host: PhysicalHost, pub physical_host: PhysicalHost,
pub host_config: HostConfig,
}
#[derive(Debug, new, Clone, Serialize)]
pub struct HostConfig {
pub installation_device: Option<String>,
} }

View File

@@ -1,11 +1,12 @@
use crate::{ use crate::{
hardware::PhysicalHost, hardware::PhysicalHost,
inventory::{HostRole, InventoryRepository, RepoError}, inventory::{HostRole, InventoryRepository, RepoError},
topology::HostConfig,
}; };
use async_trait::async_trait; use async_trait::async_trait;
use harmony_types::id::Id; use harmony_types::id::Id;
use log::info; use log::info;
use sqlx::{Pool, Sqlite, SqlitePool}; use sqlx::{Pool, Sqlite, SqlitePool, migrate::MigrateDatabase};
/// A thread-safe, connection-pooled repository using SQLite. /// A thread-safe, connection-pooled repository using SQLite.
#[derive(Debug)] #[derive(Debug)]
@@ -15,11 +16,28 @@ pub struct SqliteInventoryRepository {
impl SqliteInventoryRepository { impl SqliteInventoryRepository {
pub async fn new(database_url: &str) -> Result<Self, RepoError> { pub async fn new(database_url: &str) -> Result<Self, RepoError> {
// Ensure the database file exists for SQLite
if database_url.starts_with("sqlite:") {
let path = database_url.trim_start_matches("sqlite:");
if !path.contains(":memory:") && !std::path::Path::new(path).exists() {
sqlx::any::install_default_drivers();
sqlx::Sqlite::create_database(database_url)
.await
.map_err(|e| RepoError::ConnectionFailed(e.to_string()))?;
}
}
let pool = SqlitePool::connect(database_url) let pool = SqlitePool::connect(database_url)
.await .await
.map_err(|e| RepoError::ConnectionFailed(e.to_string()))?; .map_err(|e| RepoError::ConnectionFailed(e.to_string()))?;
info!("SQLite inventory repository initialized at '{database_url}'"); // Run migrations
sqlx::migrate!("./../migrations")
.run(&pool)
.await
.map_err(|e| RepoError::ConnectionFailed(format!("Migration failed: {}", e)))?;
info!("SQLite inventory repository initialized and migrated at '{database_url}'");
Ok(Self { pool }) Ok(Self { pool })
} }
} }
@@ -90,16 +108,18 @@ impl InventoryRepository for SqliteInventoryRepository {
&self, &self,
role: &HostRole, role: &HostRole,
host: &PhysicalHost, host: &PhysicalHost,
installation_device: &String,
) -> Result<(), RepoError> { ) -> Result<(), RepoError> {
let host_id = host.id.to_string(); let host_id = host.id.to_string();
sqlx::query!( sqlx::query!(
r#" r#"
INSERT INTO host_role_mapping (host_id, role) INSERT INTO host_role_mapping (host_id, role, installation_device)
VALUES (?, ?) VALUES (?, ?, ?)
"#, "#,
host_id, host_id,
role role,
installation_device
) )
.execute(&self.pool) .execute(&self.pool)
.await?; .await?;
@@ -109,16 +129,20 @@ impl InventoryRepository for SqliteInventoryRepository {
Ok(()) Ok(())
} }
async fn get_host_for_role(&self, role: &HostRole) -> Result<Vec<PhysicalHost>, RepoError> { async fn get_hosts_for_role(
&self,
role: &HostRole,
) -> Result<Vec<(PhysicalHost, HostConfig)>, RepoError> {
struct HostIdRow { struct HostIdRow {
host_id: String, host_id: String,
installation_device: Option<String>,
} }
let role_str = format!("{:?}", role); let role_str = format!("{:?}", role);
let host_id_rows = sqlx::query_as!( let host_id_rows = sqlx::query_as!(
HostIdRow, HostIdRow,
"SELECT host_id FROM host_role_mapping WHERE role = ?", "SELECT host_id, installation_device FROM host_role_mapping WHERE role = ?",
role_str role_str
) )
.fetch_all(&self.pool) .fetch_all(&self.pool)
@@ -126,15 +150,19 @@ impl InventoryRepository for SqliteInventoryRepository {
let mut hosts = Vec::with_capacity(host_id_rows.len()); let mut hosts = Vec::with_capacity(host_id_rows.len());
for row in host_id_rows { for row in host_id_rows {
match self.get_latest_by_id(&row.host_id).await? { let physical_host = match self.get_latest_by_id(&row.host_id).await? {
Some(host) => hosts.push(host), Some(host) => host,
None => { None => {
log::warn!( return Err(RepoError::QueryFailed(format!(
"Found a role mapping for host_id '{}', but the host does not exist in the physical_hosts table. This may indicate a data integrity issue.", "Found a role mapping for host_id '{}', but the host does not exist in the physical_hosts table. This may indicate a data integrity issue.",
row.host_id row.host_id
); )));
}
} }
};
let host_config = HostConfig {
installation_device: row.installation_device,
};
hosts.push((physical_host, host_config));
} }
Ok(hosts) Ok(hosts)

View File

@@ -82,10 +82,28 @@ impl<T: Topology> Interpret<T> for DiscoverHostForRoleInterpret {
self.score.role, self.score.role,
choice.summary() choice.summary()
); );
let disk_names: Vec<String> =
choice.storage.iter().map(|s| s.name.clone()).collect();
let disk_choice = inquire::Select::new(
&format!("Select the disk to use on host {}:", choice.summary()),
disk_names,
)
.prompt();
match disk_choice {
Ok(disk_name) => {
info!("Selected disk {} for node {}", disk_name, choice.summary());
host_repo host_repo
.save_role_mapping(&self.score.role, &choice) .save_role_mapping(&self.score.role, &choice, &disk_name)
.await?; .await?;
chosen_hosts.push(choice); chosen_hosts.push(choice);
}
Err(e) => {
error!("Failed to select disk: {}", e);
return Err(InterpretError::new(format!("Could not select disk: {e}")));
}
}
assigned_hosts += 1; assigned_hosts += 1;
info!( info!(

View File

@@ -40,13 +40,14 @@ impl<T: Topology> Interpret<T> for InspectInventoryInterpret {
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
for role in HostRole::iter() { for role in HostRole::iter() {
info!("Inspecting hosts for role {role:?}"); info!("Inspecting hosts for role {role:?}");
let hosts = repo.get_host_for_role(&role).await?; let hosts = repo.get_hosts_for_role(&role).await?;
info!("Hosts with role {role:?} : {}", hosts.len()); info!("Hosts with role {role:?} : {}", hosts.len());
hosts.iter().enumerate().for_each(|(idx, h)| { hosts.iter().enumerate().for_each(|(idx, (h, hc))| {
info!( info!(
"Found host index {idx} with role {role:?} => \n{}\n{}", "Found host index {idx} with role {role:?} => \n{}\n{}\n{:?}",
h.summary(), h.summary(),
h.parts_list() h.parts_list(),
hc,
) )
}); });
} }

View File

@@ -100,8 +100,12 @@ When you can dig them, confirm to continue.
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
while bootstrap_host.is_none() { while bootstrap_host.is_none() {
let hosts = repo.get_host_for_role(&HostRole::Bootstrap).await?; let hosts = repo.get_hosts_for_role(&HostRole::Bootstrap).await?;
bootstrap_host = hosts.into_iter().next().to_owned(); bootstrap_host = hosts
.into_iter()
.next()
.map(|(physical_host, _host_config)| physical_host)
.to_owned();
DiscoverHostForRoleScore { DiscoverHostForRoleScore {
role: HostRole::Bootstrap, role: HostRole::Bootstrap,
number_desired_hosts: 1, number_desired_hosts: 1,

View File

@@ -14,7 +14,7 @@ use crate::{
}, },
}, },
score::Score, score::Score,
topology::{HAClusterTopology, HostBinding}, topology::{HAClusterTopology, HostBinding, HostConfig},
}; };
use async_trait::async_trait; use async_trait::async_trait;
use derive_new::new; use derive_new::new;
@@ -61,10 +61,10 @@ impl OKDSetup02BootstrapInterpret {
} }
} }
async fn get_bootstrap_node(&self) -> Result<PhysicalHost, InterpretError> { async fn get_bootstrap_node(&self) -> Result<(PhysicalHost, HostConfig), InterpretError> {
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
match repo match repo
.get_host_for_role(&HostRole::Bootstrap) .get_hosts_for_role(&HostRole::Bootstrap)
.await? .await?
.into_iter() .into_iter()
.next() .next()
@@ -248,9 +248,12 @@ impl OKDSetup02BootstrapInterpret {
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
let (physical_host, host_config) = self.get_bootstrap_node().await?;
let binding = HostBinding { let binding = HostBinding {
logical_host: topology.bootstrap_host.clone(), logical_host: topology.bootstrap_host.clone(),
physical_host: self.get_bootstrap_node().await?, physical_host,
host_config,
}; };
info!("Configuring host binding for bootstrap node {binding:?}"); info!("Configuring host binding for bootstrap node {binding:?}");
@@ -268,16 +271,20 @@ impl OKDSetup02BootstrapInterpret {
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
let (bootstrap_node, host_config) = self.get_bootstrap_node().await?;
let content = BootstrapIpxeTpl { let content = BootstrapIpxeTpl {
http_ip: &topology.http_server.get_ip().to_string(), http_ip: &topology.http_server.get_ip().to_string(),
scos_path: "scos", // TODO use some constant scos_path: "scos", // TODO use some constant
ignition_http_path: "okd_ignition_files", // TODO use proper variable ignition_http_path: "okd_ignition_files", // TODO use proper variable
installation_device: "/dev/sda", installation_device: &host_config.installation_device.expect(&format!(
"Could not find an installation device for host {}",
bootstrap_node.summary()
)),
ignition_file_name: "bootstrap.ign", ignition_file_name: "bootstrap.ign",
} }
.to_string(); .to_string();
let bootstrap_node = self.get_bootstrap_node().await?;
let mac_address = bootstrap_node.get_mac_address(); let mac_address = bootstrap_node.get_mac_address();
info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node"); info!("[Bootstrap] Rendering per-MAC PXE for bootstrap node");

View File

@@ -2,10 +2,9 @@ use serde::Serialize;
use crate::{ use crate::{
interpret::Interpret, interpret::Interpret,
inventory::Inventory,
modules::dhcp::DhcpScore, modules::dhcp::DhcpScore,
score::Score, score::Score,
topology::{DhcpServer, HAClusterTopology, HostBinding, Topology}, topology::{DhcpServer, Topology},
}; };
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
@@ -13,50 +12,6 @@ pub struct OKDBootstrapDhcpScore {
dhcp_score: DhcpScore, dhcp_score: DhcpScore,
} }
impl OKDBootstrapDhcpScore {
pub fn new(topology: &HAClusterTopology, inventory: &Inventory) -> Self {
let mut host_binding: Vec<_> = topology
.control_plane
.iter()
.enumerate()
.map(|(index, topology_entry)| HostBinding {
logical_host: topology_entry.clone(),
physical_host: inventory
.control_plane_host
.get(index)
.expect("Iventory should contain at least as many physical hosts as topology")
.clone(),
})
.collect();
host_binding.push(HostBinding {
logical_host: topology.bootstrap_host.clone(),
physical_host: inventory
.worker_host
.first()
.expect("Should have at least one worker to be used as bootstrap node")
.clone(),
});
// TODO refactor this so it is not copy pasted from dhcp.rs
todo!("Add dhcp range")
// Self {
// dhcp_score: DhcpScore::new(
// host_binding,
// // TODO : we should add a tftp server to the topology instead of relying on the
// // router address, this is leaking implementation details
// Some(topology.router.get_gateway()),
// None, // To allow UEFI boot we cannot provide a legacy file
// Some("undionly.kpxe".to_string()),
// Some("ipxe.efi".to_string()),
// Some(format!(
// "http://{}:8080/boot.ipxe",
// topology.router.get_gateway()
// )),
// (self.),
// ),
// }
}
}
impl<T: Topology + DhcpServer> Score<T> for OKDBootstrapDhcpScore { impl<T: Topology + DhcpServer> Score<T> for OKDBootstrapDhcpScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> { fn create_interpret(&self) -> Box<dyn Interpret<T>> {
self.dhcp_score.create_interpret() self.dhcp_score.create_interpret()

View File

@@ -20,7 +20,7 @@ use crate::{
}, },
}, },
score::Score, score::Score,
topology::{HAClusterTopology, HostBinding, LogicalHost}, topology::{HAClusterTopology, HostBinding, HostConfig, LogicalHost},
}; };
#[derive(Debug, Clone, Serialize, new)] #[derive(Debug, Clone, Serialize, new)]
@@ -68,10 +68,10 @@ impl OKDNodeInterpret {
&self, &self,
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> { ) -> Result<Vec<(PhysicalHost, HostConfig)>, InterpretError> {
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
let mut hosts = repo.get_host_for_role(&self.host_role).await?; let mut hosts = repo.get_hosts_for_role(&self.host_role).await?;
let okd_host_properties = self.okd_role_properties(&self.host_role); let okd_host_properties = self.okd_role_properties(&self.host_role);
@@ -92,7 +92,7 @@ impl OKDNodeInterpret {
.interpret(inventory, topology) .interpret(inventory, topology)
.await?; .await?;
hosts = repo.get_host_for_role(&self.host_role).await?; hosts = repo.get_hosts_for_role(&self.host_role).await?;
if hosts.len() < required_hosts.try_into().unwrap_or(0) { if hosts.len() < required_hosts.try_into().unwrap_or(0) {
Err(InterpretError::new(format!( Err(InterpretError::new(format!(
@@ -115,7 +115,7 @@ impl OKDNodeInterpret {
&self, &self,
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>, nodes: &Vec<(PhysicalHost, HostConfig)>,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
info!( info!(
"[{}] Configuring host bindings for {} plane nodes.", "[{}] Configuring host bindings for {} plane nodes.",
@@ -142,7 +142,7 @@ impl OKDNodeInterpret {
// Ensure the topology definition matches the number of physical nodes found. // Ensure the topology definition matches the number of physical nodes found.
fn validate_host_node_match( fn validate_host_node_match(
&self, &self,
nodes: &Vec<PhysicalHost>, nodes: &Vec<(PhysicalHost, HostConfig)>,
hosts: &Vec<LogicalHost>, hosts: &Vec<LogicalHost>,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
if hosts.len() != nodes.len() { if hosts.len() != nodes.len() {
@@ -158,13 +158,13 @@ impl OKDNodeInterpret {
// Create a binding for each physical host to its corresponding logical host. // Create a binding for each physical host to its corresponding logical host.
fn host_bindings( fn host_bindings(
&self, &self,
nodes: &Vec<PhysicalHost>, nodes: &Vec<(PhysicalHost, HostConfig)>,
hosts: &Vec<LogicalHost>, hosts: &Vec<LogicalHost>,
) -> Vec<HostBinding> { ) -> Vec<HostBinding> {
hosts hosts
.iter() .iter()
.zip(nodes.iter()) .zip(nodes.iter())
.map(|(logical_host, physical_host)| { .map(|(logical_host, (physical_host, host_config))| {
info!( info!(
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'", "Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
logical_host.name, physical_host.id logical_host.name, physical_host.id
@@ -172,6 +172,7 @@ impl OKDNodeInterpret {
HostBinding { HostBinding {
logical_host: logical_host.clone(), logical_host: logical_host.clone(),
physical_host: physical_host.clone(), physical_host: physical_host.clone(),
host_config: host_config.clone(),
} }
}) })
.collect() .collect()
@@ -182,7 +183,7 @@ impl OKDNodeInterpret {
&self, &self,
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
nodes: &Vec<PhysicalHost>, nodes: &Vec<(PhysicalHost, HostConfig)>,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
info!( info!(
"[{}] Rendering per-MAC iPXE configurations.", "[{}] Rendering per-MAC iPXE configurations.",
@@ -205,7 +206,7 @@ impl OKDNodeInterpret {
debug!("[{}] iPXE content template:\n{content}", self.host_role); debug!("[{}] iPXE content template:\n{content}", self.host_role);
// Create and apply an iPXE boot file for each node. // Create and apply an iPXE boot file for each node.
for node in nodes { for (node, _) in nodes {
let mac_address = node.get_mac_address(); let mac_address = node.get_mac_address();
if mac_address.is_empty() { if mac_address.is_empty() {
return Err(InterpretError::new(format!( return Err(InterpretError::new(format!(
@@ -230,8 +231,11 @@ impl OKDNodeInterpret {
} }
/// Prompts the user to reboot the target control plane nodes. /// Prompts the user to reboot the target control plane nodes.
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> { async fn reboot_targets(
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect(); &self,
nodes: &Vec<(PhysicalHost, HostConfig)>,
) -> Result<(), InterpretError> {
let node_ids: Vec<String> = nodes.iter().map(|(n, _)| n.id.to_string()).collect();
info!( info!(
"[{}] Requesting reboot for control plane nodes: {node_ids:?}", "[{}] Requesting reboot for control plane nodes: {node_ids:?}",
self.host_role self.host_role

View File

@@ -6,7 +6,7 @@ use crate::{
inventory::{HostRole, Inventory}, inventory::{HostRole, Inventory},
modules::okd::host_network::HostNetworkConfigurationScore, modules::okd::host_network::HostNetworkConfigurationScore,
score::Score, score::Score,
topology::HAClusterTopology, topology::{HAClusterTopology, HostConfig},
}; };
use async_trait::async_trait; use async_trait::async_trait;
use derive_new::new; use derive_new::new;
@@ -54,10 +54,10 @@ impl OKDSetupPersistNetworkBondInterpet {
&self, &self,
_inventory: &Inventory, _inventory: &Inventory,
_topology: &HAClusterTopology, _topology: &HAClusterTopology,
) -> Result<Vec<PhysicalHost>, InterpretError> { ) -> Result<Vec<(PhysicalHost, HostConfig)>, InterpretError> {
const REQUIRED_HOSTS: usize = 3; const REQUIRED_HOSTS: usize = 3;
let repo = InventoryRepositoryFactory::build().await?; let repo = InventoryRepositoryFactory::build().await?;
let control_plane_hosts = repo.get_host_for_role(&HostRole::ControlPlane).await?; let control_plane_hosts = repo.get_hosts_for_role(&HostRole::ControlPlane).await?;
if control_plane_hosts.len() < REQUIRED_HOSTS { if control_plane_hosts.len() < REQUIRED_HOSTS {
Err(InterpretError::new(format!( Err(InterpretError::new(format!(
@@ -78,12 +78,12 @@ impl OKDSetupPersistNetworkBondInterpet {
&self, &self,
inventory: &Inventory, inventory: &Inventory,
topology: &HAClusterTopology, topology: &HAClusterTopology,
hosts: &Vec<PhysicalHost>, hosts: &Vec<(PhysicalHost, HostConfig)>,
) -> Result<(), InterpretError> { ) -> Result<(), InterpretError> {
info!("Ensuring persistent bonding"); info!("Ensuring persistent bonding");
let score = HostNetworkConfigurationScore { let score = HostNetworkConfigurationScore {
hosts: hosts.clone(), hosts: hosts.iter().map(|(p, _)| p.clone()).collect(),
}; };
score.interpret(inventory, topology).await?; score.interpret(inventory, topology).await?;

View File

@@ -1,14 +1,10 @@
use std::net::Ipv4Addr;
use harmony_types::net::IpAddress;
use serde::Serialize; use serde::Serialize;
use crate::{ use crate::{
interpret::Interpret, interpret::Interpret,
inventory::Inventory,
modules::dhcp::DhcpScore, modules::dhcp::DhcpScore,
score::Score, score::Score,
topology::{DhcpServer, HAClusterTopology, HostBinding, Topology}, topology::{DhcpServer, Topology},
}; };
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
@@ -16,67 +12,6 @@ pub struct OKDDhcpScore {
dhcp_score: DhcpScore, dhcp_score: DhcpScore,
} }
impl OKDDhcpScore {
pub fn new(topology: &HAClusterTopology, inventory: &Inventory) -> Self {
let mut host_binding: Vec<HostBinding> = topology
.control_plane
.iter()
.enumerate()
.map(|(index, topology_entry)| HostBinding {
logical_host: topology_entry.clone(),
physical_host: inventory
.control_plane_host
.get(index)
.expect("Iventory should contain at least as many physical hosts as topology")
.clone(),
})
.collect();
topology
.workers
.iter()
.enumerate()
.for_each(|(index, topology_entry)| {
host_binding.push(HostBinding {
logical_host: topology_entry.clone(),
physical_host: inventory
.worker_host
.get(index)
.expect("There should be enough worker hosts to fill topology")
.clone(),
})
});
let dhcp_server_ip = match topology.dhcp_server.get_ip() {
std::net::IpAddr::V4(ipv4_addr) => ipv4_addr,
std::net::IpAddr::V6(_ipv6_addr) => todo!("Support ipv6 someday"),
};
// TODO this could overflow, we should use proper subnet maths here instead of an ip
// address and guessing the subnet size from there
let start = Ipv4Addr::from(u32::from(dhcp_server_ip) + 100);
let end = Ipv4Addr::from(u32::from(dhcp_server_ip) + 150);
Self {
// TODO : we should add a tftp server to the topology instead of relying on the
// router address, this is leaking implementation details
dhcp_score: DhcpScore {
host_binding,
next_server: Some(topology.router.get_gateway()),
boot_filename: None,
filename: Some("undionly.kpxe".to_string()),
filename64: Some("ipxe.efi".to_string()),
filenameipxe: Some(format!(
"http://{}:8080/boot.ipxe",
topology.router.get_gateway()
)),
dhcp_range: (IpAddress::from(start), IpAddress::from(end)),
domain: Some(topology.domain_name.clone()),
},
}
}
}
impl<T: Topology + DhcpServer> Score<T> for OKDDhcpScore { impl<T: Topology + DhcpServer> Score<T> for OKDDhcpScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> { fn create_interpret(&self) -> Box<dyn Interpret<T>> {
self.dhcp_score.create_interpret() self.dhcp_score.create_interpret()

View File

@@ -0,0 +1,2 @@
-- Add installation_device column to host_role_mapping
ALTER TABLE host_role_mapping ADD COLUMN installation_device TEXT;