Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Helm chart for pcm-sensor-server #727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: master
Choose a base branch
Loading
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
e2e tests: cont
  • Loading branch information
ppalucki committed Jun 18, 2024
commit 91b445ecc8c4540a848b59aba4d0bed1e824c1b5
112 changes: 70 additions & 42 deletions 112 deployment/pcm/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@

chart-lint-report.txt: values.yaml templates
docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt

#
# kind cluster targets
#
# https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile
define KIND_EXTRA_MOUNTS
nodes:
Expand All @@ -7,9 +14,6 @@ nodes:
containerPath: /sys/fs/resctrl
endef

chart-lint-report.txt: values.yaml templates
docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt

export KIND_EXTRA_MOUNTS
_kind_with_registry.sh:
curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o _kind_with_registry.sh.tmp
Expand All @@ -23,79 +27,103 @@ _kind_deploy_cluster: _kind_with_registry.sh
touch _kind_deploy_cluster


clean:
kind delete cluster
docker rm -f kind-registry
rm -fv _kind_with_registry.sh
rm -fv _kind_extra_mounts.txt
rm -fv _kind_with_registry.sh.tmp
rm -fv _kind_deploy_cluster
rm -fv _kind_deploy_prometheus
rm -fv _kind_deploy_pcm


#
# e2e-small: minimal E2e pcm pod only test
# 1) e2e-default: minimal E2e pcm pod only test
#
_kind_deploy_pcm:
kind_deploy_pcm:
helm install pcm .
touch _kind_deploy_pcm
kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1

kind_pcm_test:
helm test pcm

e2e-small: _kind_deploy_cluster _kind_deploy_pcm kind_pcm_test
e2e-default: _kind_deploy_cluster kind_deploy_pcm kind_pcm_test

#
# e2e-small-metal-nfd: minimal E2e pcm pod only test but with direct approach
# 2) e2e-default-local-image: minimal E2e pcm with local image build
#
_kind_deploy_pcm_metal:
helm upgrade --install pcm . -f values-metal.yaml
touch _kind_deploy_pcm_metal
build_local_image:
(cd ../.. ; docker build . -t localhost:5001/pcm-local)
docker push localhost:5001/pcm-local

_kind_deploy_nfd:
helm upgrade --install pcm . -f values-metal.yaml
touch _kind_deploy_pcm_metal
kind_deploy_pcm_local_image:
helm upgrade --install --reset-values --wait pcm . -f values-local-image.yaml
kubectl wait daemonset pcm --for=jsonpath='{.spec.template.spec.containers[0].image'}=localhost:5001/pcm-local:latest
kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1

kind_pcm_test:
helm test pcm

e2e-small-metal: _kind_deploy_cluster _kind_deploy_pcm_metal kind_pcm_test
e2e-default-local-image: _kind_deploy_cluster build_local_image kind_deploy_pcm_local_image kind_pcm_test

#
# e2e-prometheus: E2E test for podMonitor (pod monitor test)
# 3) e2e-prometheus: E2E test for podMonitor (pod monitor test)
#
_kind_deploy_prometheus:
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
helm upgrade --install --reset-values prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false --wait
kubectl wait Prometheus prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.availableReplicas}'=1
#kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
touch _kind_deploy_prometheus

_kind_deploy_pcm-with-prometheus:
helm upgrade --install pcm . --set podMonitor=true
kind_deploy_pcm_with_prometheus:
helm upgrade --install --reset-values pcm . --set podMonitor=true
kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1

kind_pcm_test_prometheus:
kubectl proxy & sleep 10 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1

kind_pcm_prometheus_test:
kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1
e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus kind_deploy_pcm_with_prometheus kind_pcm_test kind_pcm_test_prometheus

#
# 4) e2e-metal-nfd: e2e thats tests that with node-feature-discovery installed and nfd values are changed, the PCM will be only installed on non hyperviserd system with Intel vendor and RDT available
#
_kind_deploy_nfd:
#kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel
helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts
helm upgrade --install --wait nfd nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace
# please be patient NFD requires around 2 minutes to annotate the node ...
kubectl wait node --timeout=2m kind-control-plane --for=jsonpath='{.metadata.labels.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel

kind_deploy_pcm_with_metal_nfd:
helm upgrade --install --reset-values pcm . -f values-metal-nfd.yaml
kubectl wait daemonset --timeout=2m pcm --for=jsonpath='{.status.numberReady}'=1

kind_pcm_test_nfd:
kubectl wait daemonset pcm --timeout=2m --for=jsonpath='{.spec.template.spec.nodeSelector.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel
helm test pcm

e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus _kind_deploy_pcm-with-prometheus kind_pcm_test kind_pcm_prometheus_test
e2e-metal-nfd: _kind_deploy_cluster _kind_deploy_nfd kind_deploy_pcm_with_metal_nfd kind_pcm_test

#
# e2e-vpa: VPA E2E tests
# 5) e2e-vpa: VPA E2E tests
#
_kind_autoscaler:
git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler _kind_autoscaler

_kind_deploy_metrics_server:
helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
helm repo update
helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
helm upgrade --install --reset-values --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
touch _kind_deploy_metrics_server

_kind_deploy_vpa: autoscaler
./_kind_autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
touch _kind_deploy_vpa

_kind_deploy_pcm_with_vpa:
helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true
kind_deploy_pcm_with_vpa:
helm upgrade --install --reset-values pcm . --set verticalPodAutoscaler.enabled=true
kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1

e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa kind_deploy_pcm_with_vpa kind_pcm_test

#
# Cleanup
#

e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa _kind_deploy_pcm_with_vpa kind_pcm_test
clean:
kind delete cluster
docker rm -f kind-registry
rm -fv _kind_with_registry.sh
rm -fv _kind_extra_mounts.txt
rm -fv _kind_with_registry.sh.tmp
rm -fv _kind_deploy_cluster
rm -fv _kind_deploy_prometheus
33 changes: 29 additions & 4 deletions 33 deployment/pcm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Helm chart instructions
### Features:

- Configurable as non-privileged container (value: `privileged`, default: false) and privileged container,
- Support for bare-metal and VM host configurations (files: [values-metal.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)),
- Support for bare-metal and VM host configurations (files: [values-metal-nfd.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)),
- Ability to deploy multiple releases alongside configured differently to handle different kinds of machines (bare-metal, VM) at the [same time](#heterogeneous-mixed-vmmetal-instances-cluster),
- Linux Watchdog handling (controlled with `PCM_KEEP_NMI_WATCHDOG`, `PCM_NO_AWS_WORKAROUND`, `nmiWatchdogMount` values).
- Deploy to own namespace with "helm install ... **-n pcm --create-namespace**".
Expand Down Expand Up @@ -77,6 +77,22 @@ More information here: https://kubernetes.io/docs/tutorials/security/ns-level-ps
- hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!).
- Prometheus podMonitor is disabled (enabled it with --set podMonitor=true).

### TLS

TODO:
- requires pcm-sensor-server to be build with SSL support
- ERRROR !!!!

```
mkdir build
cd build
cmake .. -DCMAKE_CXX_FLAGS='-DUSE_SSL -lssl'
zypper install openssl-devel
make pcm-sensor-server -j
openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=StateName/L=CityName/O=CompanyName/OU=CompanySectionName/CN=CommonNameOrHostname"
./bin/pcm-sensor-server -s -p 8443 --certificateFile cert.pem --privateKeyFile key.pem
```

### Validation on local kind cluster

#### Requirements
Expand Down Expand Up @@ -282,7 +298,7 @@ helm install pcm . -f values-direct-privileged.yaml
#### Homogeneous bare metal instances cluster (full set of metrics)

```
helm install pcm . -f values-metal.yaml
helm install pcm . -f values-metal-nfd.yaml
```

#### Homogenizer VM instances cluster (limited set of metrics core)
Expand All @@ -293,10 +309,10 @@ helm install pcm . -f values-vm.yaml

#### Heterogeneous (mixed VM/metal instances) cluster

values-metal.yaml requires node-feature-discovery to be preinstallaed
values-metal-nfd.yaml requires node-feature-discovery to be preinstallaed
```
helm install pcm-vm . -f values-vm.yaml
helm install pcm-metal . -f values-metal.yaml
helm install pcm-metal . -f values-metal-nfd.yaml
```

#### Direct method as non-privileged container (not recommended)
Expand Down Expand Up @@ -402,3 +418,12 @@ kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15
| | energy | | | cpucounters.cpp initEnergyMonitoring() | |

ppalucki marked this conversation as resolved.
Show resolved Hide resolved

### E2E tests

Following end to end tests based on kind enviornment are provided by make targets:

- `e2e-default` - test PCM with default configuration (indirect) and checks connection by calling `helm test`
- `e2e-default-local-image` - same as above but build and deploys PCM with local image
- `e2e-prometheus` - test PCM chart with deployed PodMonitor with Prometheus stack and queries Prometheus for collected data,
- `e2e-vpa` - deploy PCM with VerticalPodAutoscaler (requires metrics-service to be deployed alongside)
- `e2e-metal-nfd` - test PCM chart on metal scheduled by features exposed by node-feature-discovery (uses: values-metal-nfd.yaml),
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ PCM_NO_AWS_WORKAROUND: 1
PCM_KEEP_NMI_WATCHDOG: 0
nfd: true
nfdBaremetalAffinity: true
nfdRDTAffinity: true
3 changes: 2 additions & 1 deletion 3 deployment/pcm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ nriBalloonsPolicyIntegration: false
nfd: false
# if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true)
nfdBaremetalAffinity: false
# if enabled, followin RDT labels will be required for scheduling (requires nfd=true)
# if enabled, following RDT labels will be required for scheduling (requires nfd=true)
# TODO: those labels are no longer available with default node-feature-discovery deployment
# feature.node.kubernetes.io/cpu-rdt.RDTCMT=true
# feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true
# feature.node.kubernetes.io/cpu-rdt.RDTMBA=true
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.