kubectl 备忘单

发表于 2021-04-30 |

Kubectl 上下文和配置

设置 kubectl 与哪个 Kubernetes 集群进行通信并修改配置信息。

kubectl config view # 显示合并的 kubeconfig 配置。
# 同时使用多个 kubeconfig 文件并查看合并的配置
KUBECONFIG=~/.kube/config:~/.kube/kubconfig2 kubectl config view
# 获取 e2e 用户的密码
kubectl config view -o jsonpath='{.users[?(@.name == "e2e")].user.password}'
kubectl config view -o jsonpath='{.users[].name}'    # 显示第一个用户
kubectl config view -o jsonpath='{.users[*].name}'   # 获取用户列表
kubectl config get-contexts                          # 显示上下文列表
kubectl config current-context                       # 展示当前所处的上下文
kubectl config use-context my-cluster-name           # 设置默认的上下文为 my-cluster-name
# 添加新的用户配置到 kubeconf 中，使用 basic auth 进行身份认证
kubectl config set-credentials kubeuser/foo.kubernetes.com --username=kubeuser --password=kubepassword
# 在指定上下文中持久性地保存名字空间，供所有后续 kubectl 命令使用
kubectl config set-context --current --namespace=ggckad-s2
# 使用特定的用户名和名字空间设置上下文
kubectl config set-context gce --user=cluster-admin --namespace=foo \
  && kubectl config use-context gce
kubectl config unset users.foo                       # 删除用户 foo

Kubectl apply

apply 通过定义 Kubernetes 资源的文件来管理应用。它通过运行 kubectl apply 在集群中创建和更新资源。这是在生产中管理 Kubernetes 应用的推荐方法

创建对象

Kubernetes 配置可以用 YAML 或 JSON 定义。可以使用的文件扩展名有 .yaml、.yml 和 .json。

kubectl apply -f ./my-manifest.yaml           # 创建资源
kubectl apply -f ./my1.yaml -f ./my2.yaml     # 使用多个文件创建
kubectl apply -f ./dir                        # 基于目录下的所有清单文件创建资源
kubectl apply -f https://git.io/vPieo         # 从 URL 中创建资源
kubectl create deployment nginx --image=nginx # 启动单实例 nginx
# 创建一个打印 “Hello World” 的 Job
kubectl create job hello --image=busybox -- echo "Hello World" 
# 创建一个打印 “Hello World” 间隔1分钟的 CronJob
kubectl create cronjob hello --image=busybox   --schedule="*/1 * * * *" -- echo "Hello World"    
kubectl explain pods                          # 获取 pod 清单的文档说明
# 从标准输入创建多个 YAML 对象
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: busybox-sleep
spec:
  containers:
  - name: busybox
    image: busybox
    args:
    - sleep
    - "1000000"
---
apiVersion: v1
kind: Pod
metadata:
  name: busybox-sleep-less
spec:
  containers:
  - name: busybox
    image: busybox
    args:
    - sleep
    - "1000"
EOF
# 创建有多个 key 的 Secret
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
  name: mysecret
type: Opaque
data:
  password: $(echo -n "s33msi4" | base64 -w0)
  username: $(echo -n "jane" | base64 -w0)
EOF

查看和查找资源

# get 命令的基本输出
kubectl get services                          # 列出当前命名空间下的所有 services
kubectl get pods --all-namespaces             # 列出所有命名空间下的全部的 Pods
kubectl get pods -o wide                      # 列出当前命名空间下的全部 Pods，并显示更详细的信息
kubectl get deployment my-dep                 # 列出某个特定的 Deployment
kubectl get pods                              # 列出当前命名空间下的全部 Pods
kubectl get pod my-pod -o yaml                # 获取一个 pod 的 YAML
# describe 命令的详细输出
kubectl describe nodes my-node
kubectl describe pods my-pod
# 列出当前名字空间下所有 Services，按名称排序
kubectl get services --sort-by=.metadata.name
# 列出 Pods，按重启次数排序
kubectl get pods --sort-by='.status.containerStatuses[0].restartCount'
# 列举所有 PV 持久卷，按容量排序
kubectl get pv --sort-by=.spec.capacity.storage
# 获取包含 app=cassandra 标签的所有 Pods 的 version 标签
kubectl get pods --selector=app=cassandra -o \
  jsonpath='{.items[*].metadata.labels.version}'
# 检索带有 “.” 键值，例： 'ca.crt'
kubectl get configmap myconfig \
  -o jsonpath='{.data.ca\.crt}'
# 获取所有工作节点（使用选择器以排除标签名称为 'node-role.kubernetes.io/master' 的结果）
kubectl get node --selector='!node-role.kubernetes.io/master'
# 获取当前命名空间中正在运行的 Pods
kubectl get pods --field-selector=status.phase=Running
# 获取全部节点的 ExternalIP 地址
kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="ExternalIP")].address}'
# 列出属于某个特定 RC 的 Pods 的名称
# 在转换对于 jsonpath 过于复杂的场合，"jq" 命令很有用；可以在 https://stedolan.github.io/jq/ 找到它。
sel=${$(kubectl get rc my-rc --output=json | jq -j '.spec.selector | to_entries | .[] | "\(.key)=\(.value),"')%?}
echo $(kubectl get pods --selector=$sel --output=jsonpath={.items..metadata.name})
# 显示所有 Pods 的标签（或任何其他支持标签的 Kubernetes 对象）
kubectl get pods --show-labels
# 检查哪些节点处于就绪状态
JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' \
 && kubectl get nodes -o jsonpath="$JSONPATH" | grep "Ready=True"
# 列出被一个 Pod 使用的全部 Secret
kubectl get pods -o json | jq '.items[].spec.containers[].env[]?.valueFrom.secretKeyRef.name' | grep -v null | sort | uniq
# 列举所有 Pods 中初始化容器的容器 ID（containerID）
# Helpful when cleaning up stopped containers, while avoiding removal of initContainers.
kubectl get pods --all-namespaces -o jsonpath='{range .items[*].status.initContainerStatuses[*]}{.containerID}{"\n"}{end}' | cut -d/ -f3
# 列出事件（Events），按时间戳排序
kubectl get events --sort-by=.metadata.creationTimestamp
# 比较当前的集群状态和假定某清单被应用之后的集群状态
kubectl diff -f ./my-manifest.yaml
# 生成一个句点分隔的树，其中包含为节点返回的所有键
# 在复杂的嵌套JSON结构中定位键时非常有用
kubectl get nodes -o json | jq -c 'path(..)|[.[]|tostring]|join(".")'
# 生成一个句点分隔的树，其中包含为pod等返回的所有键
kubectl get pods -o json | jq -c 'path(..)|[.[]|tostring]|join(".")'

更新资源

kubectl set image deployment/frontend www=image:v2               # 滚动更新 "frontend" Deployment 的 "www" 容器镜像
kubectl rollout history deployment/frontend                      # 检查 Deployment 的历史记录，包括版本
kubectl rollout undo deployment/frontend                         # 回滚到上次部署版本
kubectl rollout undo deployment/frontend --to-revision=2         # 回滚到特定部署版本
kubectl rollout status -w deployment/frontend                    # 监视 "frontend" Deployment 的滚动升级状态直到完成
kubectl rollout restart deployment/frontend                      # 轮替重启 "frontend" Deployment
cat pod.json | kubectl replace -f -                              # 通过传入到标准输入的 JSON 来替换 Pod
# 强制替换，删除后重建资源。会导致服务不可用。
kubectl replace --force -f ./pod.json
# 为多副本的 nginx 创建服务，使用 80 端口提供服务，连接到容器的 8000 端口。
kubectl expose rc nginx --port=80 --target-port=8000
# 将某单容器 Pod 的镜像版本（标签）更新到 v4
kubectl get pod mypod -o yaml | sed 's/\(image: myimage\):.*$/\1:v4/' | kubectl replace -f -
kubectl label pods my-pod new-label=awesome                      # 添加标签
kubectl annotate pods my-pod icon-url=http://goo.gl/XXBTWq       # 添加注解
kubectl autoscale deployment foo --min=2 --max=10                # 对 "foo" Deployment 自动伸缩容

部分更新资源

# 部分更新某节点
kubectl patch node k8s-node-1 -p '{"spec":{"unschedulable":true}}'
# 更新容器的镜像；spec.containers[*].name 是必须的。因为它是一个合并性质的主键。
kubectl patch pod valid-pod -p '{"spec":{"containers":[{"name":"kubernetes-serve-hostname","image":"new image"}]}}'
# 使用带位置数组的 JSON patch 更新容器的镜像
kubectl patch pod valid-pod --type='json' -p='[{"op": "replace", "path": "/spec/containers/0/image", "value":"new image"}]'
# 使用带位置数组的 JSON patch 禁用某 Deployment 的 livenessProbe
kubectl patch deployment valid-deployment  --type json   -p='[{"op": "remove", "path": "/spec/template/spec/containers/0/livenessProbe"}]'
# 在带位置数组中添加元素
kubectl patch sa default --type='json' -p='[{"op": "add", "path": "/secrets/1", "value": {"name": "whatever" } }]'

编辑资源

1	kubectl edit svc/docker-registry # 编辑名为 docker-registry 的服务

删除资源

kubectl delete -f ./pod.json                                              # 删除在 pod.json 中指定的类型和名称的 Pod
kubectl delete pod,service baz foo                                        # 删除名称为 "baz" 和 "foo" 的 Pod 和服务
kubectl delete pods,services -l name=myLabel                              # 删除包含 name=myLabel 标签的 pods 和服务
kubectl -n my-ns delete pod,svc --all                                     # 删除在 my-ns 名字空间中全部的 Pods 和服务
# 删除所有与 pattern1 或 pattern2 awk 模式匹配的 Pods
kubectl get pods  -n mynamespace --no-headers=true | awk '/pattern1|pattern2/{print $1}' | xargs  kubectl delete -n mynamespace pod

与运行中的 Pods 进行交互

kubectl logs my-pod                                 # 获取 pod 日志（标准输出）
kubectl logs -l name=myLabel                        # 获取含 name=myLabel 标签的 Pods 的日志（标准输出）
kubectl logs my-pod --previous                      # 获取上个容器实例的 pod 日志（标准输出）
kubectl logs my-pod -c my-container                 # 获取 Pod 容器的日志（标准输出, 多容器场景）
kubectl logs -l name=myLabel -c my-container        # 获取含 name=myLabel 标签的 Pod 容器日志（标准输出, 多容器场景）
kubectl logs my-pod -c my-container --previous      # 获取 Pod 中某容器的上个实例的日志（标准输出, 多容器场景）
kubectl logs -f my-pod                              # 流式输出 Pod 的日志（标准输出）
kubectl logs -f my-pod -c my-container              # 流式输出 Pod 容器的日志（标准输出, 多容器场景）
kubectl logs -f -l name=myLabel --all-containers    # 流式输出含 name=myLabel 标签的 Pod 的所有日志（标准输出）
kubectl run -i --tty busybox --image=busybox -- sh  # 以交互式 Shell 运行 Pod
kubectl run nginx --image=nginx -n mynamespace      # 在指定名字空间中运行 nginx Pod
kubectl run nginx --image=nginx                     # 运行 ngins Pod 并将其规约写入到名为 pod.yaml 的文件
  --dry-run=client -o yaml > pod.yaml
kubectl attach my-pod -i                            # 挂接到一个运行的容器中
kubectl port-forward my-pod 5000:6000               # 在本地计算机上侦听端口 5000 并转发到 my-pod 上的端口 6000
kubectl exec my-pod -- ls /                         # 在已有的 Pod 中运行命令（单容器场景）
kubectl exec --stdin --tty my-pod -- /bin/sh        # 使用交互 shell 访问正在运行的 Pod (一个容器场景)
kubectl exec my-pod -c my-container -- ls /         # 在已有的 Pod 中运行命令（多容器场景）
kubectl top pod POD_NAME --containers               # 显示给定 Pod 和其中容器的监控数据

与节点和集群进行交互

kubectl cordon my-node                                                # 标记 my-node 节点为不可调度
kubectl drain my-node                                                 # 对 my-node 节点进行清空操作，为节点维护做准备
kubectl uncordon my-node                                              # 标记 my-node 节点为可以调度
kubectl top node my-node                                              # 显示给定节点的度量值
kubectl cluster-info                                                  # 显示主控节点和服务的地址
kubectl cluster-info dump                                             # 将当前集群状态转储到标准输出
kubectl cluster-info dump --output-directory=/path/to/cluster-state   # 将当前集群状态输出到 /path/to/cluster-state
# 如果已存在具有指定键和效果的污点，则替换其值为指定值。
kubectl taint nodes foo dedicated=special-user:NoSchedule

格式化输出

要以特定格式将详细信息输出到终端窗口，将 -o（或者 --output）参数添加到支持的 kubectl 命令中。

输出格式	描述
`-o=custom-columns=<spec>`	使用逗号分隔的自定义列来打印表格
`-o=custom-columns-file=<filename>`	使用 `<filename>` 文件中的自定义列模板打印表格
`-o=json`	输出 JSON 格式的 API 对象
`-o=jsonpath=<template>`	打印 jsonpath 表达式中定义的字段
`-o=jsonpath-file=<filename>`	打印在 `<filename>` 文件中定义的 jsonpath 表达式所指定的字段。
`-o=name`	仅打印资源名称而不打印其他内容
`-o=wide`	以纯文本格式输出额外信息，对于 Pod 来说，输出中包含了节点名称
`-o=yaml`	输出 YAML 格式的 API 对象

Kubectl 日志输出详细程度和调试

Kubectl 日志输出详细程度是通过 -v 或者 --v 来控制的，参数后跟一个数字表示日志的级别。 Kubernetes 通用的日志习惯和相关的日志级别在这里有相应的描述。

详细程度	描述
`--v=0`	用于那些应该始终对运维人员可见的信息，因为这些信息一般很有用。
`--v=1`	如果您不想要看到冗余信息，此值是一个合理的默认日志级别。
`--v=2`	输出有关服务的稳定状态的信息以及重要的日志消息，这些信息可能与系统中的重大变化有关。这是建议大多数系统设置的默认日志级别。
`--v=3`	包含有关系统状态变化的扩展信息。
`--v=4`	包含调试级别的冗余信息。
`--v=5`	跟踪级别的详细程度。
`--v=6`	显示所请求的资源。
`--v=7`	显示 HTTP 请求头。
`--v=8`	显示 HTTP 请求内容。
`--v=9`	显示 HTTP 请求内容而且不截断内容。

Prometheus Operator 自定义报警

发表于 2021-04-29 |

prometheus
钉钉预警

部署Prometheus 软件

首先，检查一下我们所运行的Helm版本

1 2	$ helm version version.BuildInfo{Version:"v3.1.2", GitCommit:"d878d4d45863e42fd5cff6743294a11d28a9abce", GitTreeState:"clean", GoVersion:"go1.13.8"}

当我们使用Helm 3时，我们需要添加一个stable 镜像仓库，因为默认状态下不会设置该仓库。

[root@ops1 test]# helm repo add stable http://mirror.azure.cn/kubernetes/charts/
[root@ops1 test]# helm repo list
NAME            URL
local           http://127.0.0.1:8879/charts                       
stable          http://mirror.azure.cn/kubernetes/charts/
incubator       http://mirror.azure.cn/kubernetes/charts-incubator/
[root@ops1 test]# helm repo update

Helm配置完成后，我们可以开始安装prometheus-operator

$ kubectl create namespace monitoring
namespace/monitoring created
[root@ops1 test]# helm search prometheus
stable/prometheus-operator              8.12.0          0.37.0          Provides easy monitoring definitions for Kubernetes servi...
[root@ops1 test]# helm fetch stable/prometheus-operator --version 8.12.0
[root@ops1 test]# tar -zxf prometheus-operator-8.12.0.tgz 
$ cd prometheus-operator

编辑values.yaml

cat values.yaml

# Default values for prometheus-operator.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
## Provide a name in place of prometheus-operator for `app:` labels
##
nameOverride: ""
## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6
##
kubeTargetVersionOverride: ""
## Provide a name to substitute for the full names of resources
##
fullnameOverride: ""
## Labels to apply to all resources
##
commonLabels: {}
# scmhash: abc123
# myLabel: aakkmd
## Create default rules for monitoring the cluster
##
defaultRules:
  create: true
  rules:
    alertmanager: true
    etcd: true
    general: true
    k8s: true
    kubeApiserver: true
    kubeApiserverError: true
    kubePrometheusNodeAlerting: true
    kubePrometheusNodeRecording: true
    kubernetesAbsent: true
    kubernetesApps: true
    kubernetesResources: true
    kubernetesStorage: true
    kubernetesSystem: true
    kubeScheduler: true
    network: true
    node: true
    prometheus: true
    prometheusOperator: true
    time: true
  ## Runbook url prefix for default rules
  runbookUrl: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#
  ## Reduce app namespace alert scope
  appNamespacesTarget: ".*"
  ## Labels for default rules
  labels: {}
  ## Annotations for default rules
  annotations: {}
## Provide custom recording or alerting rules to be deployed into the cluster.
##
additionalPrometheusRules: []
#  - name: my-rule-file
#    groups:
#      - name: my_group
#        rules:
#        - record: my_record
#          expr: 100 * my_record
##
global:
  rbac:
    create: true
    pspEnabled: true
  ## Reference to one or more secrets to be used when pulling images
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
  ##
  imagePullSecrets: []
  # - name: "image-pull-secret"
## Configuration for alertmanager
## ref: https://prometheus.io/docs/alerting/alertmanager/
##
alertmanager:
  ## Deploy alertmanager
  ##
  enabled: true
  ## Api that prometheus will use to communicate with alertmanager. Possible values are v1, v2
  ##
  apiVersion: v2
  ## Service account for Alertmanager to use.
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
  ##
  serviceAccount:
    create: true
    name: ""
  ## Configure pod disruption budgets for Alertmanager
  ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget
  ## This configuration is immutable once created and will require the PDB to be deleted to be changed
  ## https://github.com/kubernetes/kubernetes/issues/45398
  ##
  podDisruptionBudget:
    enabled: false
    minAvailable: 1
    maxUnavailable: ""
  ## Alertmanager configuration directives
  ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file
  ##      https://prometheus.io/webtools/alerting/routing-tree-editor/
  ##
  config:
    global:
      resolve_timeout: 5m
    route:
      group_by: ['job']
      group_wait: 30s
      group_interval: 5m
      repeat_interval: 12h
      receiver: 'null'
      routes:
      - match:
          alertname: Watchdog
        receiver: 'null'
    receivers:
    - name: 'null'
  ## Pass the Alertmanager configuration directives through Helm's templating
  ## engine. If the Alertmanager configuration contains Alertmanager templates,
  ## they'll need to be properly escaped so that they are not interpreted by
  ## Helm
  ## ref: https://helm.sh/docs/developing_charts/#using-the-tpl-function
  ##      https://prometheus.io/docs/alerting/configuration/#%3Ctmpl_string%3E
  ##      https://prometheus.io/docs/alerting/notifications/
  ##      https://prometheus.io/docs/alerting/notification_examples/
  tplConfig: false
  ## Alertmanager template files to format alerts
  ## ref: https://prometheus.io/docs/alerting/notifications/
  ##      https://prometheus.io/docs/alerting/notification_examples/
  ##
  templateFiles: {}
  #
  ## An example template:
  #   template_1.tmpl: |-
  #       {{ define "cluster" }}{{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }}{{ end }}
  #
  #       {{ define "slack.myorg.text" }}
  #       {{- $root := . -}}
  #       {{ range .Alerts }}
  #         *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
  #         *Cluster:*  {{ template "cluster" $root }}
  #         *Description:* {{ .Annotations.description }}
  #         *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
  #         *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
  #         *Details:*
  #           {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
  #           {{ end }}
  ingress:
    enabled: false
    annotations: {}
    labels: {}
    ## Hosts must be provided if Ingress is enabled.
    ##
    hosts: []
      # - alertmanager.domain.com
    ## Paths to use for ingress rules - one path should match the alertmanagerSpec.routePrefix
    ##
    paths: []
    # - /
    ## TLS configuration for Alertmanager Ingress
    ## Secret must be manually created in the namespace
    ##
    tls: []
    # - secretName: alertmanager-general-tls
    #   hosts:
    #   - alertmanager.example.com
  ## Configuration for Alertmanager secret
  ##
  secret:
    annotations: {}
  ## Configuration for Alertmanager service
  ##
  service:
    annotations: {}
    labels: {}
    clusterIP: ""
    ## Port for Alertmanager Service to listen on
    ##
    port: 9093
    ## To be used with a proxy extraContainer port
    ##
    targetPort: 9093
    ## Port to expose on each node
    ## Only used if service.type is 'NodePort'
    ##
    nodePort: 30091
    ## List of IP addresses at which the Prometheus server service is available
    ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    ##
    externalIPs: []
    loadBalancerIP: ""
    loadBalancerSourceRanges: []
    ## Service type
    ##
    #type: ClusterIP
    type: NodePort
  ## If true, create a serviceMonitor for alertmanager
  ##
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    selfMonitor: true
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
  ## Settings affecting alertmanagerSpec
  ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec
  ##
  alertmanagerSpec:
    ## Standard object’s metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
    ## Metadata Labels and Annotations gets propagated to the Alertmanager pods.
    ##
    podMetadata: {}
    ## Image of Alertmanager
    ##
    image:
      repository: quay.io/prometheus/alertmanager
      tag: v0.20.0
    ## If true then the user will be responsible to provide a secret with alertmanager configuration
    ## So when true the config part will be ignored (including templateFiles) and the one in the secret will be used
    ##
    useExistingSecret: false
    ## Secrets is a list of Secrets in the same namespace as the Alertmanager object, which shall be mounted into the
    ## Alertmanager Pods. The Secrets are mounted into /etc/alertmanager/secrets/.
    ##
    secrets: []
    ## ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods.
    ## The ConfigMaps are mounted into /etc/alertmanager/configmaps/.
    ##
    configMaps: []
    ## ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains configuration for
    ## this Alertmanager instance. Defaults to 'alertmanager-' The secret is mounted into /etc/alertmanager/config.
    ##
    # configSecret:
    ## Define Log Format
    # Use logfmt (default) or json-formatted logging
    logFormat: logfmt
    ## Log level for Alertmanager to be configured with.
    ##
    logLevel: info
    ## Size is the expected size of the alertmanager cluster. The controller will eventually make the size of the
    ## running cluster equal to the expected size.
    replicas: 1
    ## Time duration Alertmanager shall retain data for. Default is '120h', and must match the regular expression
    ## [0-9]+(ms|s|m|h) (milliseconds seconds minutes hours).
    ##
    retention: 120h
    ## Storage is the definition of how storage will be used by the Alertmanager instances.
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
    ##
    storage: {}
    # volumeClaimTemplate:
    #   spec:
    #     storageClassName: gluster
    #     accessModes: ["ReadWriteOnce"]
    #     resources:
    #       requests:
    #         storage: 50Gi
    #   selector: {}
    ## 	The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name.	string	false
    ##
    externalUrl:
    ## 	The route prefix Alertmanager registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true,
    ## but the server serves requests under a different route prefix. For example for use with kubectl proxy.
    ##
    routePrefix: /
    ## If set to true all actions on the underlying managed objects are not going to be performed, except for delete actions.
    ##
    paused: false
    ## Define which Nodes the Pods are scheduled on.
    ## ref: https://kubernetes.io/docs/user-guide/node-selection/
    ##
    nodeSelector: {}
    ## Define resources requests and limits for single Pods.
    ## ref: https://kubernetes.io/docs/user-guide/compute-resources/
    ##
    resources: {}
    # requests:
    #   memory: 400Mi
    ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
    ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
    ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node.
    ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured.
    ##
    podAntiAffinity: ""
    ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity.
    ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone
    ##
    podAntiAffinityTopologyKey: kubernetes.io/hostname
    ## Assign custom affinity rules to the alertmanager instance
    ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
    ##
    affinity: {}
    # nodeAffinity:
    #   requiredDuringSchedulingIgnoredDuringExecution:
    #     nodeSelectorTerms:
    #     - matchExpressions:
    #       - key: kubernetes.io/e2e-az-name
    #         operator: In
    #         values:
    #         - e2e-az1
    #         - e2e-az2
    ## If specified, the pod's tolerations.
    ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
    ##
    tolerations: []
    # - key: "key"
    #   operator: "Equal"
    #   value: "value"
    #   effect: "NoSchedule"
    ## SecurityContext holds pod-level security attributes and common container settings.
    ## This defaults to non root user with uid 1000 and gid 2000.	*v1.PodSecurityContext	false
    ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
    ##
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000
      fsGroup: 2000
    ## ListenLocal makes the Alertmanager server listen on loopback, so that it does not bind against the Pod IP.
    ## Note this is only for the Alertmanager UI, not the gossip communication.
    ##
    listenLocal: false
    ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager pod.
    ##
    containers: []
    ## Priority class assigned to the Pods
    ##
    priorityClassName: ""
    ## AdditionalPeers allows injecting a set of additional Alertmanagers to peer with to form a highly available cluster.
    ##
    additionalPeers: []
    ## PortName to use for Alert Manager.
    ##
    portName: "web"
## Using default values from https://github.com/helm/charts/blob/master/stable/grafana/values.yaml
##
grafana:
  enabled: true
  ## Deploy default dashboards.
  ##
  defaultDashboardsEnabled: true
  adminPassword: prom-operator
  ingress:
    ## If true, Grafana Ingress will be created
    ##
    enabled: false
    ## Annotations for Grafana Ingress
    ##
    annotations: {}
      # kubernetes.io/ingress.class: nginx
      # kubernetes.io/tls-acme: "true"
    ## Labels to be added to the Ingress
    ##
    labels: {}
    ## Hostnames.
    ## Must be provided if Ingress is enable.
    ##
    # hosts:
    #   - grafana.domain.com
    hosts: []
    ## Path for grafana ingress
    path: /
    ## TLS configuration for grafana Ingress
    ## Secret must be manually created in the namespace
    ##
    tls: []
    # - secretName: grafana-general-tls
    #   hosts:
    #   - grafana.example.com
  sidecar:
    dashboards:
      enabled: true
      label: grafana_dashboard
    datasources:
      enabled: true
      defaultDatasourceEnabled: true
      ## Annotations for Grafana datasource configmaps
      ##
      annotations: {}
      ## Create datasource for each Pod of Prometheus StatefulSet;
      ## this uses headless service `prometheus-operated` which is
      ## created by Prometheus Operator
      ## ref: https://git.io/fjaBS
      createPrometheusReplicasDatasources: false
      label: grafana_datasource
  extraConfigmapMounts: []
  # - name: certs-configmap
  #   mountPath: /etc/grafana/ssl/
  #   configMap: certs-configmap
  #   readOnly: true
  ## Configure additional grafana datasources
  ## ref: http://docs.grafana.org/administration/provisioning/#datasources
  additionalDataSources: []
  #  - name: prometheus-sample
  #    access: proxy
  #    basicAuth: true
  #    basicAuthPassword: pass
  #    basicAuthUser: daco
  #    editable: false
  #    jsonData:
  #        tlsSkipVerify: true
  #    orgId: 1
  #    type: prometheus
  #    url: https://prometheus.svc:9090
  #    version: 1
  ## Passed to grafana subchart and used by servicemonitor below
  ##
  service:
    portName: service
  ## If true, create a serviceMonitor for grafana
  ##
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    selfMonitor: true
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping the kube api server
##
kubeApiServer:
  enabled: true
  tlsConfig:
    serverName: kubernetes
    insecureSkipVerify: false
  ## If your API endpoint address is not reachable (as in AKS) you can replace it with the kubernetes service
  ##
  relabelings: []
  # - sourceLabels:
  #     - __meta_kubernetes_namespace
  #     - __meta_kubernetes_service_name
  #     - __meta_kubernetes_endpoint_port_name
  #   action: keep
  #   regex: default;kubernetes;https
  # - targetLabel: __address__
  #   replacement: kubernetes.default.svc:443
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    jobLabel: component
    selector:
      matchLabels:
        component: apiserver
        provider: kubernetes
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
## Component scraping the kubelet and kubelet-hosted cAdvisor
##
kubelet:
  enabled: true
  namespace: kube-system
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## Enable scraping the kubelet over https. For requirements to enable this see
    ## https://github.com/coreos/prometheus-operator/issues/926
    ##
    https: true
    ## Metric relabellings to apply to samples before ingestion
    ##
    cAdvisorMetricRelabelings: []
    # - sourceLabels: [__name__, image]
    #   separator: ;
    #   regex: container_([a-z_]+);
    #   replacement: $1
    #   action: drop
    # - sourceLabels: [__name__]
    #   separator: ;
    #   regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
    #   replacement: $1
    #   action: drop
    # 	relabel configs to apply to samples before ingestion.
    #   metrics_path is required to match upstream rules and charts
    ##
    cAdvisorRelabelings:
      - sourceLabels: [__metrics_path__]
        targetLabel: metrics_path
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
    metricRelabelings: []
    # - sourceLabels: [__name__, image]
    #   separator: ;
    #   regex: container_([a-z_]+);
    #   replacement: $1
    #   action: drop
    # - sourceLabels: [__name__]
    #   separator: ;
    #   regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
    #   replacement: $1
    #   action: drop
    # 	relabel configs to apply to samples before ingestion.
    #   metrics_path is required to match upstream rules and charts
    ##
    relabelings:
      - sourceLabels: [__metrics_path__]
        targetLabel: metrics_path
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping the kube controller manager
##
kubeControllerManager:
  enabled: true
  ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
  ##
  endpoints: []
  # - 10.141.4.22
  # - 10.141.4.23
  # - 10.141.4.24
  ## If using kubeControllerManager.endpoints only the port and targetPort are used
  ##
  service:
    port: 10252
    targetPort: 10252
    # selector:
    #   component: kube-controller-manager
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## Enable scraping kube-controller-manager over https.
    ## Requires proper certs (not self-signed) and delegated authentication/authorization checks
    ##
    https: false
    # Skip TLS certificate validation when scraping
    insecureSkipVerify: null
    # Name of the server to use when validating TLS certificate
    serverName: null
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping coreDns. Use either this or kubeDns
##
coreDns:
  enabled: true
  service:
    port: 9153
    targetPort: 9153
    # selector:
    #   k8s-app: kube-dns
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping kubeDns. Use either this or coreDns
##
kubeDns:
  enabled: false
  service:
    dnsmasq:
      port: 10054
      targetPort: 10054
    skydns:
      port: 10055
      targetPort: 10055
    # selector:
    #   k8s-app: kube-dns
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
    dnsmasqMetricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    dnsmasqRelabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping etcd
##
kubeEtcd:
  enabled: true
  ## If your etcd is not deployed as a pod, specify IPs it can be found on
  ##
  endpoints: []
  # - 10.141.4.22
  # - 10.141.4.23
  # - 10.141.4.24
  ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used
  ##
  service:
    port: 2379
    targetPort: 2379
    # selector:
    #   component: etcd
  ## Configure secure access to the etcd cluster by loading a secret into prometheus and
  ## specifying security configuration below. For example, with a secret named etcd-client-cert
  ##
  ## serviceMonitor:
  ##   scheme: https
  ##   insecureSkipVerify: false
  ##   serverName: localhost
  ##   caFile: /etc/prometheus/secrets/etcd-client-cert/etcd-ca
  ##   certFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client
  ##   keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key
  ##
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    scheme: http
    insecureSkipVerify: false
    serverName: ""
    caFile: ""
    certFile: ""
    keyFile: ""
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping kube scheduler
##
kubeScheduler:
  enabled: true
  ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
  ##
  endpoints: []
  # - 10.141.4.22
  # - 10.141.4.23
  # - 10.141.4.24
  ## If using kubeScheduler.endpoints only the port and targetPort are used
  ##
  service:
    port: 10251
    targetPort: 10251
    # selector:
    #   component: kube-scheduler
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## Enable scraping kube-scheduler over https.
    ## Requires proper certs (not self-signed) and delegated authentication/authorization checks
    ##
    https: false
    ## Skip TLS certificate validation when scraping
    insecureSkipVerify: null
    ## Name of the server to use when validating TLS certificate
    serverName: null
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Component scraping kube proxy
##
kubeProxy:
  enabled: true
  ## If your kube proxy is not deployed as a pod, specify IPs it can be found on
  ##
  endpoints: []
  # - 10.141.4.22
  # - 10.141.4.23
  # - 10.141.4.24
  service:
    port: 10249
    targetPort: 10249
    # selector:
    #   k8s-app: kube-proxy
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## Enable scraping kube-proxy over https.
    ## Requires proper certs (not self-signed) and delegated authentication/authorization checks
    ##
    https: false
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
## Component scraping kube state metrics
##
kubeStateMetrics:
  enabled: true
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Configuration for kube-state-metrics subchart
##
kube-state-metrics:
  rbac:
    create: true
  podSecurityPolicy:
    enabled: true
## Deploy node exporter as a daemonset to all nodes
##
nodeExporter:
  enabled: true
  ## Use the value configured in prometheus-node-exporter.podLabels
  ##
  jobLabel: jobLabel
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    ## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used.
    ##
    scrapeTimeout: ""
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - sourceLabels: [__name__]
    #   separator: ;
    #   regex: ^node_mountstats_nfs_(event|operations|transport)_.+
    #   replacement: $1
    #   action: drop
    ## 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
## Configuration for prometheus-node-exporter subchart
##
prometheus-node-exporter:
  podLabels:
    ## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards
    ##
    jobLabel: node-exporter
  extraArgs:
    - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
    - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
## Manages Prometheus and Alertmanager components
##
prometheusOperator:
  enabled: true
  # If true prometheus operator will create and update its CRDs on startup
  manageCrds: true
  tlsProxy:
    enabled: true
    image:
      repository: squareup/ghostunnel
      tag: v1.5.2
      pullPolicy: IfNotPresent
    resources: {}
  ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
  ## rules from making their way into prometheus and potentially preventing the container from starting
  admissionWebhooks:
    failurePolicy: Fail
    enabled: true
    ## If enabled, generate a self-signed certificate, then patch the webhook configurations with the generated data.
    ## On chart upgrades (or if the secret exists) the cert will not be re-generated. You can use this to provide your own
    ## certs ahead of time if you wish.
    ##
    patch:
      enabled: true
      image:
        repository: jettech/kube-webhook-certgen
        tag: v1.0.0
        pullPolicy: IfNotPresent
      resources: {}
      ## Provide a priority class name to the webhook patching job
      ##
      priorityClassName: ""
      podAnnotations: {}
      nodeSelector: {}
      affinity: {}
      tolerations: []
  ## Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list).
  ## This is mutually exclusive with denyNamespaces. Setting this to an empty object will disable the configuration
  ##
  namespaces: {}
    # releaseNamespace: true
    # additional:
    # - kube-system
  ## Namespaces not to scope the interaction of the Prometheus Operator (deny list).
  ##
  denyNamespaces: []
  ## Service account for Alertmanager to use.
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
  ##
  serviceAccount:
    create: true
    name: ""
  ## Configuration for Prometheus operator service
  ##
  service:
    annotations: {}
    labels: {}
    clusterIP: ""
  ## Port to expose on each node
  ## Only used if service.type is 'NodePort'
  ##
    nodePort: 30080
    nodePortTls: 30443
  ## Additional ports to open for Prometheus service
  ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services
  ##
    additionalPorts: []
  ## Loadbalancer IP
  ## Only use if service.type is "loadbalancer"
  ##
    loadBalancerIP: ""
    loadBalancerSourceRanges: []
  ## Service type
  ## NodePort, ClusterIP, loadbalancer
  ##
    type: ClusterIP
    ## List of IP addresses at which the Prometheus server service is available
    ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    ##
    externalIPs: []
  ## Deploy CRDs used by Prometheus Operator.
  ##
  createCustomResource: true
  ## Attempt to clean up CRDs created by Prometheus Operator.
  ##
  cleanupCustomResource: false
  ## Labels to add to the operator pod
  ##
  podLabels: {}
  ## Annotations to add to the operator pod
  ##
  podAnnotations: {}
  ## Assign a PriorityClassName to pods if set
  # priorityClassName: ""
  ## Define Log Format
  # Use logfmt (default) or json-formatted logging
  # logFormat: logfmt
  ## Decrease log verbosity to errors only
  # logLevel: error
  ## If true, the operator will create and maintain a service for scraping kubelets
  ## ref: https://github.com/coreos/prometheus-operator/blob/master/helm/prometheus-operator/README.md
  ##
  kubeletService:
    enabled: true
    namespace: kube-system
  ## Create a servicemonitor for the operator
  ##
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    selfMonitor: true
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
  ## Resource limits & requests
  ##
  resources: {}
  # limits:
  #   cpu: 200m
  #   memory: 200Mi
  # requests:
  #   cpu: 100m
  #   memory: 100Mi
  ## Define which Nodes the Pods are scheduled on.
  ## ref: https://kubernetes.io/docs/user-guide/node-selection/
  ##
  nodeSelector: {}
  ## Tolerations for use with node taints
  ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
  ##
  tolerations: []
  # - key: "key"
  #   operator: "Equal"
  #   value: "value"
  #   effect: "NoSchedule"
  ## Assign custom affinity rules to the prometheus operator
  ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
  ##
  affinity: {}
    # nodeAffinity:
    #   requiredDuringSchedulingIgnoredDuringExecution:
    #     nodeSelectorTerms:
    #     - matchExpressions:
    #       - key: kubernetes.io/e2e-az-name
    #         operator: In
    #         values:
    #         - e2e-az1
    #         - e2e-az2
  securityContext:
    runAsNonRoot: true
    runAsUser: 65534
  ## Prometheus-operator image
  ##
  image:
    repository: quay.io/coreos/prometheus-operator
    tag: v0.37.0
    pullPolicy: IfNotPresent
  ## Configmap-reload image to use for reloading configmaps
  ##
  configmapReloadImage:
    repository: quay.io/coreos/configmap-reload
    tag: v0.0.1
  ## Prometheus-config-reloader image to use for config and rule reloading
  ##
  prometheusConfigReloaderImage:
    repository: quay.io/coreos/prometheus-config-reloader
    tag: v0.37.0
  ## Set the prometheus config reloader side-car CPU limit
  ##
  configReloaderCpu: 100m
  ## Set the prometheus config reloader side-car memory limit
  ##
  configReloaderMemory: 25Mi
  ## Hyperkube image to use when cleaning up
  ##
  hyperkubeImage:
    repository: k8s.gcr.io/hyperkube
    tag: v1.12.1
    pullPolicy: IfNotPresent
## Deploy a Prometheus instance
##
prometheus:
  enabled: true
  ## Annotations for Prometheus
  ##
  annotations: {}
  ## Service account for Prometheuses to use.
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
  ##
  serviceAccount:
    create: true
    name: ""
  ## Configuration for Prometheus service
  ##
  service:
    annotations: {}
    labels: {}
    clusterIP: ""
    ## Port for Prometheus Service to listen on
    ##
    port: 9090
    ## To be used with a proxy extraContainer port
    targetPort: 9090
    ## List of IP addresses at which the Prometheus server service is available
    ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
    ##
    externalIPs: []
    ## Port to expose on each node
    ## Only used if service.type is 'NodePort'
    ##
    nodePort: 30090
    type: NodePort
    ## Loadbalancer IP
    ## Only use if service.type is "loadbalancer"
    loadBalancerIP: ""
    loadBalancerSourceRanges: []
    ## Service type
    ##
    ##type: ClusterIP
    sessionAffinity: ""
  ## Configuration for creating a separate Service for each statefulset Prometheus replica
  ##
  servicePerReplica:
    enabled: false
    annotations: {}
    ## Port for Prometheus Service per replica to listen on
    ##
    port: 9090
    ## To be used with a proxy extraContainer port
    targetPort: 9090
    ## Port to expose on each node
    ## Only used if servicePerReplica.type is 'NodePort'
    ##
    nodePort: 30091
    ## Loadbalancer source IP ranges
    ## Only used if servicePerReplica.type is "loadbalancer"
    loadBalancerSourceRanges: []
    ## Service type
    ##
    #type: NodePort
    type: ClusterIP
  ## Configure pod disruption budgets for Prometheus
  ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget
  ## This configuration is immutable once created and will require the PDB to be deleted to be changed
  ## https://github.com/kubernetes/kubernetes/issues/45398
  ##
  podDisruptionBudget:
    enabled: false
    minAvailable: 1
    maxUnavailable: ""
  ingress:
    enabled: false
    annotations: {}
    labels: {}
    ## Hostnames.
    ## Must be provided if Ingress is enabled.
    ##
    # hosts:
    #   - prometheus.domain.com
    hosts: []
    ## Paths to use for ingress rules - one path should match the prometheusSpec.routePrefix
    ##
    paths: []
    # - /
    ## TLS configuration for Prometheus Ingress
    ## Secret must be manually created in the namespace
    ##
    tls: []
      # - secretName: prometheus-general-tls
      #   hosts:
      #     - prometheus.example.com
  ## Configuration for creating an Ingress that will map to each Prometheus replica service
  ## prometheus.servicePerReplica must be enabled
  ##
  ingressPerReplica:
    enabled: false
    annotations: {}
    labels: {}
    ## Final form of the hostname for each per replica ingress is
    ## {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }}
    ##
    ## Prefix for the per replica ingress that will have `-$replicaNumber`
    ## appended to the end
    hostPrefix: ""
    ## Domain that will be used for the per replica ingress
    hostDomain: ""
    ## Paths to use for ingress rules
    ##
    paths: []
    # - /
    ## Secret name containing the TLS certificate for Prometheus per replica ingress
    ## Secret must be manually created in the namespace
    tlsSecretName: ""
  ## Configure additional options for default pod security policy for Prometheus
  ## ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
  podSecurityPolicy:
    allowedCapabilities: []
  serviceMonitor:
    ## Scrape interval. If not set, the Prometheus default scrape interval is used.
    ##
    interval: ""
    selfMonitor: true
    ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS.
    scheme: ""
    ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS.
    ## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig
    tlsConfig: {}
    bearerTokenFile:
    ## 	metric relabel configs to apply to samples before ingestion.
    ##
    metricRelabelings: []
    # - action: keep
    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
    #   sourceLabels: [__name__]
    # 	relabel configs to apply to samples before ingestion.
    ##
    relabelings: []
    # - sourceLabels: [__meta_kubernetes_pod_node_name]
    #   separator: ;
    #   regex: ^(.*)$
    #   targetLabel: nodename
    #   replacement: $1
    #   action: replace
  ## Settings affecting prometheusSpec
  ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
  ##
  prometheusSpec:
    ## If true, pass --storage.tsdb.max-block-duration=2h to prometheus. This is already done if using Thanos
    ##
    disableCompaction: false
    ## APIServerConfig
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#apiserverconfig
    ##
    apiserverConfig: {}
    ## Interval between consecutive scrapes.
    ##
    scrapeInterval: ""
    ## Interval between consecutive evaluations.
    ##
    evaluationInterval: ""
    ## ListenLocal makes the Prometheus server listen on loopback, so that it does not bind against the Pod IP.
    ##
    listenLocal: false
    ## EnableAdminAPI enables Prometheus the administrative HTTP API which includes functionality such as deleting time series.
    ## This is disabled by default.
    ## ref: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis
    ##
    enableAdminAPI: false
    ## Image of Prometheus.
    ##
    image:
      repository: quay.io/prometheus/prometheus
      tag: v2.15.2
    ## Tolerations for use with node taints
    ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
    ##
    tolerations: []
    #  - key: "key"
    #    operator: "Equal"
    #    value: "value"
    #    effect: "NoSchedule"
    ## Alertmanagers to which alerts will be sent
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
    ##
    ## Default configuration will connect to the alertmanager deployed as part of this release
    ##
    alertingEndpoints: []
    # - name: ""
    #   namespace: ""
    #   port: http
    #   scheme: http
    #   pathPrefix: ""
    #   tlsConfig: {}
    #   bearerTokenFile: ""
    #   apiVersion: v2
    ## External labels to add to any time series or alerts when communicating with external systems
    ##
    externalLabels: {}
    ## Name of the external label used to denote replica name
    ##
    replicaExternalLabelName: ""
    ## If true, the Operator won't add the external label used to denote replica name
    ##
    replicaExternalLabelNameClear: false
    ## Name of the external label used to denote Prometheus instance name
    ##
    prometheusExternalLabelName: ""
    ## If true, the Operator won't add the external label used to denote Prometheus instance name
    ##
    prometheusExternalLabelNameClear: false
    ## External URL at which Prometheus will be reachable.
    ##
    externalUrl: ""
    ## Define which Nodes the Pods are scheduled on.
    ## ref: https://kubernetes.io/docs/user-guide/node-selection/
    ##
    nodeSelector: {}
    ## Secrets is a list of Secrets in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods.
    ## The Secrets are mounted into /etc/prometheus/secrets/. Secrets changes after initial creation of a Prometheus object are not
    ## reflected in the running Pods. To change the secrets mounted into the Prometheus Pods, the object must be deleted and recreated
    ## with the new list of secrets.
    ##
    secrets: []
    ## ConfigMaps is a list of ConfigMaps in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods.
    ## The ConfigMaps are mounted into /etc/prometheus/configmaps/.
    ##
    configMaps: []
    ## QuerySpec defines the query command line flags when starting Prometheus.
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#queryspec
    ##
    query: {}
    ## Namespaces to be selected for PrometheusRules discovery.
    ## If nil, select own namespace. Namespaces to be selected for ServiceMonitor discovery.
    ## See https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
    ##
    ruleNamespaceSelector: {}
    ## If true, a nil or {} value for prometheus.prometheusSpec.ruleSelector will cause the
    ## prometheus resource to be created with selectors based on values in the helm deployment,
    ## which will also match the PrometheusRule resources created
    ##
    ruleSelectorNilUsesHelmValues: true
    ## PrometheusRules to be selected for target discovery.
    ## If {}, select all ServiceMonitors
    ##
    ruleSelector: {}
    ## Example which select all prometheusrules resources
    ## with label "prometheus" with values any of "example-rules" or "example-rules-2"
    # ruleSelector:
    #   matchExpressions:
    #     - key: prometheus
    #       operator: In
    #       values:
    #         - example-rules
    #         - example-rules-2
    #
    ## Example which select all prometheusrules resources with label "role" set to "example-rules"
    # ruleSelector:
    #   matchLabels:
    #     role: example-rules
    ## If true, a nil or {} value for prometheus.prometheusSpec.serviceMonitorSelector will cause the
    ## prometheus resource to be created with selectors based on values in the helm deployment,
    ## which will also match the servicemonitors created
    ##
    serviceMonitorSelectorNilUsesHelmValues: true
    ## ServiceMonitors to be selected for target discovery.
    ## If {}, select all ServiceMonitors
    ##
    serviceMonitorSelector: {}
    ## Example which selects ServiceMonitors with label "prometheus" set to "somelabel"
    # serviceMonitorSelector:
    #   matchLabels:
    #     prometheus: somelabel
    ## Namespaces to be selected for ServiceMonitor discovery.
    ## See https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
    ##
    serviceMonitorNamespaceSelector: {}
    ## If true, a nil or {} value for prometheus.prometheusSpec.podMonitorSelector will cause the
    ## prometheus resource to be created with selectors based on values in the helm deployment,
    ## which will also match the podmonitors created
    ##
    podMonitorSelectorNilUsesHelmValues: true
    ## PodMonitors to be selected for target discovery.
    ## If {}, select all PodMonitors
    ##
    podMonitorSelector: {}
    ## Example which selects PodMonitors with label "prometheus" set to "somelabel"
    # podMonitorSelector:
    #   matchLabels:
    #     prometheus: somelabel
    ## Namespaces to be selected for PodMonitor discovery.
    ## See https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
    ##
    podMonitorNamespaceSelector: {}
    ## How long to retain metrics
    ##
    retention: 10d
    ## Maximum size of metrics
    ##
    retentionSize: ""
    ## Enable compression of the write-ahead log using Snappy.
    ##
    walCompression: false
    ## If true, the Operator won't process any Prometheus configuration changes
    ##
    paused: false
    ## Number of Prometheus replicas desired
    ##
    replicas: 1
    ## Log level for Prometheus be configured in
    ##
    logLevel: info
    ## Log format for Prometheus be configured in
    ##
    logFormat: logfmt
    ## Prefix used to register routes, overriding externalUrl route.
    ## Useful for proxies that rewrite URLs.
    ##
    routePrefix: /
    ## Standard object’s metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
    ## Metadata Labels and Annotations gets propagated to the prometheus pods.
    ##
    podMetadata: {}
    # labels:
    #   app: prometheus
    #   k8s-app: prometheus
    ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
    ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
    ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node.
    ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured.
    podAntiAffinity: ""
    ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity.
    ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone
    ##
    podAntiAffinityTopologyKey: kubernetes.io/hostname
    ## Assign custom affinity rules to the prometheus instance
    ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
    ##
    affinity: {}
    # nodeAffinity:
    #   requiredDuringSchedulingIgnoredDuringExecution:
    #     nodeSelectorTerms:
    #     - matchExpressions:
    #       - key: kubernetes.io/e2e-az-name
    #         operator: In
    #         values:
    #         - e2e-az1
    #         - e2e-az2
    ## The remote_read spec configuration for Prometheus.
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotereadspec
    remoteRead: []
    # - url: http://remote1/read
    ## The remote_write spec configuration for Prometheus.
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#remotewritespec
    remoteWrite: []
    # - url: http://remote1/push
    ## Enable/Disable Grafana dashboards provisioning for prometheus remote write feature
    remoteWriteDashboards: false
    ## Resource limits & requests
    ##
    resources: {}
    # requests:
    #   memory: 400Mi
    ## Prometheus StorageSpec for persistent data
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
    ##
    storageSpec: {}
    #  volumeClaimTemplate:
    #    spec:
    #      storageClassName: gluster
    #      accessModes: ["ReadWriteOnce"]
    #      resources:
    #        requests:
    #          storage: 50Gi
    #    selector: {}
    ## AdditionalScrapeConfigs allows specifying additional Prometheus scrape configurations. Scrape configurations
    ## are appended to the configurations generated by the Prometheus Operator. Job configurations must have the form
    ## as specified in the official Prometheus documentation:
    ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config. As scrape configs are
    ## appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility
    ## to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible
    ## scrape configs are going to break Prometheus after the upgrade.
    ##
    ## The scrape configuraiton example below will find master nodes, provided they have the name .*mst.*, relabel the
    ## port to 2379 and allow etcd scraping provided it is running on all Kubernetes master nodes
    ##
    additionalScrapeConfigs: []
    # - job_name: kube-etcd
    #   kubernetes_sd_configs:
    #     - role: node
    #   scheme: https
    #   tls_config:
    #     ca_file:   /etc/prometheus/secrets/etcd-client-cert/etcd-ca
    #     cert_file: /etc/prometheus/secrets/etcd-client-cert/etcd-client
    #     key_file:  /etc/prometheus/secrets/etcd-client-cert/etcd-client-key
    #   relabel_configs:
    #   - action: labelmap
    #     regex: __meta_kubernetes_node_label_(.+)
    #   - source_labels: [__address__]
    #     action: replace
    #     targetLabel: __address__
    #     regex: ([^:;]+):(\d+)
    #     replacement: ${1}:2379
    #   - source_labels: [__meta_kubernetes_node_name]
    #     action: keep
    #     regex: .*mst.*
    #   - source_labels: [__meta_kubernetes_node_name]
    #     action: replace
    #     targetLabel: node
    #     regex: (.*)
    #     replacement: ${1}
    #   metric_relabel_configs:
    #   - regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
    #     action: labeldrop
    ## additionalPrometheusSecretsAnnotations allows to add annotations to the kubernetes secret. This can be useful
    ## when deploying via spinnaker to disable versioning on the secret, strategy.spinnaker.io/versioned: 'false'
    additionalPrometheusSecretsAnnotations: {}
    ## AdditionalAlertManagerConfigs allows for manual configuration of alertmanager jobs in the form as specified
    ## in the official Prometheus documentation https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<alertmanager_config>.
    ## AlertManager configurations specified are appended to the configurations generated by the Prometheus Operator.
    ## As AlertManager configs are appended, the user is responsible to make sure it is valid. Note that using this
    ## feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release
    ## notes to ensure that no incompatible AlertManager configs are going to break Prometheus after the upgrade.
    ##
    additionalAlertManagerConfigs: []
    # - consul_sd_configs:
    #   - server: consul.dev.test:8500
    #     scheme: http
    #     datacenter: dev
    #     tag_separator: ','
    #     services:
    #       - metrics-prometheus-alertmanager
    ## AdditionalAlertRelabelConfigs allows specifying Prometheus alert relabel configurations. Alert relabel configurations specified are appended
    ## to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the
    ## official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs.
    ## As alert relabel configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the
    ## possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible alert relabel
    ## configs are going to break Prometheus after the upgrade.
    ##
    additionalAlertRelabelConfigs: []
    # - separator: ;
    #   regex: prometheus_replica
    #   replacement: $1
    #   action: labeldrop
    ## SecurityContext holds pod-level security attributes and common container settings.
    ## This defaults to non root user with uid 1000 and gid 2000.
    ## https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md
    ##
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000
      fsGroup: 2000
    ## 	Priority class assigned to the Pods
    ##
    priorityClassName: ""
    ## Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.
    ## This section is experimental, it may change significantly without deprecation notice in any release.
    ## This is experimental and may change significantly without backward compatibility in any release.
    ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#thanosspec
    ##
    thanos: {}
    ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod.
    ##  if using proxy extraContainer  update targetPort with proxy container port
    containers: []
    ## Enable additional scrape configs that are managed externally to this chart. Note that the prometheus
    ## will fail to provision if the correct secret does not exist.
    ## This option requires that you are maintaining a secret in the same namespace as Prometheus with
    ## a name of 'prometheus-operator-prometheus-scrape-confg' and a key of 'additional-scrape-configs.yaml' that
    ## contains a list of scrape_config's. The name of the secret may vary if you utilize the "fullnameOverride".
    ## This feature cannot be used in conjunction with the additionalScrapeConfigs attribute (the helm-generated
    ## secret will overwrite your self-maintained secret).
    ##
    ## scrape_config docs: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
    ## explanation of "confg" typo: https://github.com/helm/charts/issues/13368
    additionalScrapeConfigsExternal: false
    ## PortName to use for Prometheus.
    ##
    portName: "web"
  additionalServiceMonitors: []
  ## Name of the ServiceMonitor to create
  ##
  # - name: ""
    ## Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from
    ## the chart
    ##
    # additionalLabels: {}
    ## Service label for use in assembling a job name of the form <label value>-<port>
    ## If no label is specified, the service name is used.
    ##
    # jobLabel: ""
    ## labels to transfer from the kubernetes service to the target
    ##
    # targetLabels: ""
    ## Label selector for services to which this ServiceMonitor applies
    ##
    # selector: {}
    ## Namespaces from which services are selected
    ##
    # namespaceSelector:
      ## Match any namespace
      ##
      # any: false
      ## Explicit list of namespace names to select
      ##
      # matchNames: []
    ## Endpoints of the selected service to be monitored
    ##
    # endpoints: []
      ## Name of the endpoint's service port
      ## Mutually exclusive with targetPort
      # - port: ""
      ## Name or number of the endpoint's target port
      ## Mutually exclusive with port
      # - targetPort: ""
      ## File containing bearer token to be used when scraping targets
      ##
      #   bearerTokenFile: ""
      ## Interval at which metrics should be scraped
      ##
      #   interval: 30s
      ## HTTP path to scrape for metrics
      ##
      #   path: /metrics
      ## HTTP scheme to use for scraping
      ##
      #   scheme: http
      ## TLS configuration to use when scraping the endpoint
      ##
      #   tlsConfig:
          ## Path to the CA file
          ##
          # caFile: ""
          ## Path to client certificate file
          ##
          # certFile: ""
          ## Skip certificate verification
          ##
          # insecureSkipVerify: false
          ## Path to client key file
          ##
          # keyFile: ""
          ## Server name used to verify host name
          ##
          # serverName: ""
  additionalPodMonitors: []
  ## Name of the PodMonitor to create
  ##
  # - name: ""
    ## Additional labels to set used for the PodMonitorSelector. Together with standard labels from
    ## the chart
    ##
    # additionalLabels: {}
    ## Pod label for use in assembling a job name of the form <label value>-<port>
    ## If no label is specified, the pod endpoint name is used.
    ##
    # jobLabel: ""
    ## Label selector for pods to which this PodMonitor applies
    ##
    # selector: {}
    ## PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
    ##
    # podTargetLabels: {}
    ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
    ##
    # sampleLimit: 0
    ## Namespaces from which pods are selected
    ##
    # namespaceSelector:
      ## Match any namespace
      ##
      # any: false
      ## Explicit list of namespace names to select
      ##
      # matchNames: []
    ## Endpoints of the selected pods to be monitored
    ## https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#podmetricsendpoint
    ##
    # podMetricsEndpoints: []

安装 prometheus-operator

helm install --name prometheus-operator . --namespace=monitoring

让我们清理默认规则，使得我们可以更好地观察我们将要创建的那个规则。以下命令将删除所有规则，但会留下monitoring-demo-prometheus-operator-alertmanager.rules。

1	$ kubectl -n monitoring delete prometheusrules $(kubectl -n monitoring get prometheusrules \| grep -v alert)

1
2
3

$ kubectl -n monitoring get prometheusrules
NAME                                          AGE
demo-prometheus-operator-alertmanager.rules   8m53s

注意：我们只保留一条规则是为了让demo更容易。但是有一条规则，你绝对不能删除，它位于monitoring-demo-prometheus-operator-general.rules.yaml中，被称为看门狗。该告警总是处于触发状态，其目的是确保整个告警流水线正常运转。

创建自己的预警

cat prometheus-operator-tomcat-rules.yaml


apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    chart: prometheus-operator-8.12.0
    heritage: Tiller
    app: prometheus-operator
    release: prometheus-operator
  name: prometheus-operator-tomcat-test.rules
  namespace: monitoring
spec:
  groups:
  #- name: webhook
  #  rules:
  #  - alert: "钉钉报警测试"
  #    expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics",
  #      namespace=~".*"}[15m]) * 60 * 5 > 0
  #    for: 5m
  #    labels:
  #      severity: '警告'
  #    annotations:
  #      summary: "{{ $labels.instance }}: 钉钉报警测试"
  #      description: "{{ $labels.instance }}：钉钉报警测试"
  #      custom: "钉钉报警测试"
  #      value: "{{$value}}"
  - name: alert-pods.rules
    rules:
    - alert: "160测试/灰度环境Pod应用挂起预警"
      annotations:
        description: 应用 {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
          }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
      expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics",
        namespace=~".*"}[15m]) * 60 * 5 > 0
      for: 15m
      labels:
        severity: critical  # 注意这行，alertmangar要根据这个标签来进行报警

现在我们已经完成了Prometheus告警的设置，让我们配置Alertmanager，使得我们能够通过电子邮件获得告警通知。Alertmanager的配置位于Kubernetes secret对象中。

cat alertmanager.yaml

global:
  # 在没有报警的情况下声明为已解决的时间
  resolve_timeout: 5m
  # 配置邮件发送信息
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: 'XXX@qq.com'
  smtp_auth_username: 'XXX@qq.com'
  smtp_auth_password: "xxxxxxxxxxx"
  smtp_hello: 'xxxx@qq.com'
  smtp_require_tls: false
# 所有报警信息进入后的根路由，用来设置报警的分发策略
route:
  # 这里的标签列表是接收到报警信息后的重新分组标签，例如，接收到的报警信息里面有许多具有 cluster=A 和 alertname=LatncyHigh 这样的标签的报警信息将会批量被聚合到一个分组里面
  group_by: ['alertname', 'cluster','namespace']
  # 当一个新的报警分组被创建后，需要等待至少group_wait时间来初始化通知，这种方式可以确保您能有足够的时间为同一分组来获取多个警报，然后一起触发这个报警信息。
  group_wait: 20m
  # 当第一个报警发送后，等待'group_interval'时间来发送新的一组报警信息。
  group_interval: 30m
  # 如果一个报警信息已经发送成功了，等待'repeat_interval'时间来重新发送他们
  repeat_interval: 25m
  # 默认的receiver：如果一个报警没有被一个route匹配，则发送给默认的接收器
  #receiver: default
  receiver: webhook
  # 上面所有的属性都由所有子路由继承，并且可以在每个子路由上进行覆盖。
  routes:
  - receiver: email
    group_wait: 10s
    match:
      alertManagerRule: node  #根据此标签，选择报警规则
  - receiver: webhook
    match:
      severity: critical
receivers:
- name: 'default'
  email_configs:
  - to: 'xxxx@qq.com'
    send_resolved: true
- name: 'email'
  email_configs:
  - to: 'xxxxx@qq.com'
    send_resolved: true
 # webhook_configs:
 # - url: 'http://dingtalk-hook:5000'
 #   send_resolved: true
- name: 'webhook'
  webhook_configs:
  - url: 'http://webhook-dingtalk/dingtalk/webhook1/send'
    send_resolved: true

1 2	kubectl create secret generic alertmanager-prometheus-operator-alertmanager --from-file=alertmanager.yaml -n monitoring kubectl apply -f prometheus-operator-tomcat-rules.yaml

可能遇到的坑

kubectl get validatingwebhookconfigurations.admissionregistration.k8s.io
kubectl delete  validatingwebhookconfigurations.admissionregistration.k8s.io  prometheus-operator-admission
kubectl get MutatingWebhookConfiguration
kubectl delete MutatingWebhookConfiguration prometheus-operator-admission

Kubernetes集群Prometheus Operator钉钉报警配置

创建k8s dingtalk-webhook

cat dingtalk-webhook.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: webhook-dingtalk
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: dingtalk
  replicas: 1
  template:
    metadata:
      labels:
        app: dingtalk
    spec:
      nodeSelector:
        nodeType: master
      restartPolicy: Always
      containers:
      - name: dingtalk
        image: timonwong/prometheus-webhook-dingtalk:v1.4.0
        imagePullPolicy: IfNotPresent
        args:
          - '--web.enable-ui'
          - '--web.enable-lifecycle'
          - '--config.file=/mnt/data/dingtalk/dingdingalert/templates/config.yml'
        ports:
        - containerPort: 8060
          protocol: TCP
        volumeMounts:
        - mountPath: "/mnt/data/dingtalk/dingdingalert/templates"
          name: dingtalk-volume
      volumes:
      - name: dingtalk-volume
        hostPath:
          path: "/mnt/data/dingtalk/dingdingalert/templates"
        #persistentVolumeClaim:
        #  claimName: dingding-pvc
---
apiVersion: v1
kind: Service
metadata:
  name: webhook-dingtalk
  namespace: monitoring
spec:
  ports:
  - port: 80
    protocol: TCP
    targetPort: 8060
  selector:
    app: dingtalk
  sessionAffinity: None

cat /mnt/data/dingtalk/dingdingalert/templates/config.yml

templates:
  - /mnt/data/dingtalk/dingdingalert/templates/template.tmpl
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=84e6cdba089128514d1a641faf8bd28655af2089a47c729877f23c697e71f09b

cat /mnt/data/dingtalk/dingdingalert/templates/template.tmpl

{{ define "ding.link.title" }}[监控报警]{{ end }}
{{ define "ding.link.content" -}}
{{- if gt (len .Alerts.Firing) 0 -}}
  {{ range $i, $alert := .Alerts.Firing }}
    [告警项目]:{{ index $alert.Labels "alertname" }}
    [告警实例]:{{ index $alert.Labels "instance" }}
    [告警级别]:{{ index $alert.Labels "severity" }}
    [告警详情]:{{ index $alert.Annotations "description" }}
    [触发时间]:{{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
  {{ end }}{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
  {{ range $i, $alert := .Alerts.Resolved }}
    [项目]:{{ index $alert.Labels "alertname" }}
    [实例]:{{ index $alert.Labels "instance" }}
    [状态]:正在重启
    [开始]:{{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    [恢复]:{{ (.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
  {{ end }}{{- end }}
{{- end }}

Springboot AliOssUtil

发表于 2021-03-22 |

package com.xxx.common.utils;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.util.IdUtil;
import com.aliyun.oss.*;
import com.aliyun.oss.model.CannedAccessControlList;
import com.aliyun.oss.model.PutObjectRequest;
import com.aliyun.oss.model.PutObjectResult;
import com.huazhu.spm.face.config.AliOssProperties;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.io.InputStream;
import java.util.Date;
/**
 * @author haobiao
 * @date 2020/12/22 11:05
 * @description 阿里OSS存储
 */
@Component
@Slf4j
public class AliOssUtil {
    private AliOssProperties aliOssProperties;
    private OSS initClient() {
        ClientBuilderConfiguration conf = new ClientBuilderConfiguration();
        conf.setConnectionTimeout(10000);
        conf.setMaxErrorRetry(10);
        return new OSSClientBuilder().build(aliOssProperties.getEndpoint(), aliOssProperties.getKey(), aliOssProperties.getSecret(), conf);
    }
    /**
     * 上传照片
     *
     * @param inputStream
     * @param fileType    文件类型
     * @return
     */
    public String ossUploadFile(InputStream inputStream, String fileType) {
        try {
            OSS ossBuild = new OSSClientBuilder().build(aliOssProperties.getEndpoint(), aliOssProperties.getKey(), aliOssProperties.getSecret());
            String bucketName = aliOssProperties.getBucket();
            String endpoint = aliOssProperties.getEndpoint();
            String fileName = DateUtil.format(new Date(), "yyyyMMdd") + "/" + IdUtil.simpleUUID() + "." + fileType;
            //上传文件
            PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, fileName, inputStream);
            PutObjectResult result = ossBuild.putObject(putObjectRequest);
            //设置权限为公开读
            ossBuild.setObjectAcl(bucketName, fileName, CannedAccessControlList.PublicRead);
            if (null != result) {
                return "http://" + bucketName + "." + endpoint + "/" + fileName;
            }
            ossBuild.shutdown();
        } catch (Exception e) {
            log.error("ossUploadFile exception is {}", e);
        }
        return "";
    }
    public AliOssUtil(AliOssProperties aliOssProperties) {
        this.aliOssProperties = aliOssProperties;
    }
}

k8s 部署和gitlab CI/CD

发表于 2021-03-16 |

准备工作

1. 安装必备软件

1	yum install -y wget vim net-tools epel-release

2. 关闭swap

swapoff -a
# 永久禁用，打开/etc/fstab注释掉swap那一行。
sed -i 's/.*swap.*/#&/' /etc/fstab

3. 关闭selinux

# 临时禁用selinux
setenforce 0
# 永久关闭 修改/etc/sysconfig/selinux文件设置
sed -i 's/SELINUX=permissive/SELINUX=disabled/' /etc/sysconfig/selinux
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config

4. 关闭防火墙

1 2	systemctl disable firewalld systemctl stop firewalld

安装Docker和配置代理

1. 配置yum源

## 配置默认源
## 备份
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
## 下载阿里源
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
## 刷新
yum makecache fast
## 配置k8s源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
EOF
## 重建yum缓存
yum clean all
yum makecache fast
yum -y update

2. 安装docker

yum -y install yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum -y install  docker-ce-20.10.5 
systemctl enable docker
systemctl start docker

3. 确保kubelet使用的cgroup driver 与 Docker的一致

cat > /etc/docker/daemon.json <<EOF
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2",
  "storage-opts": [
    "overlay2.override_kernel_check=true"
  ],
  "registry-mirrors": ["https://ifa4ye2m.mirror.aliyuncs.com"]
}
EOF
sudo systemctl daemon-reload sudo systemctl restart docker

4. 从国内仓库拉取镜像（核心步骤-如果没有代理）

#!/bin/bash
images=(
kube-apiserver:v1.18.16
kube-controller-manager:v1.18.16
kube-scheduler:v1.18.16
kube-proxy:v1.18.16
pause:3.2
etcd:3.4.3-0
coredns:1.6.7
kubernetes-dashboard-amd64:v1.10.1
)
for imageName in ${images[@]};do
        docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/$imageName
        docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/$imageName k8s.gcr.io/$imageName
done

开始安装k8s

1. 安装kubeadm，kubelet等

1	yum install kubelet-1.18.0 kubeadm-1.18.0 kubectl-1.18.0 kubernetes-cni-1.18.0

2. 集群初始化

## master节点执行：
sudo kubeadm init \
 --apiserver-advertise-address 10.1.69.101 \
 --kubernetes-version=v1.15.0 \
 --pod-network-cidr=10.244.0.0/16
如果外网：
kubeadm init --apiserver-advertise-address xx.xx.255.84 --pod-network-cidr=10.244.0.0/16 --kubernetes-version=v1.18.0

得到回复：


(...省略)
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
## 保存好该命令，丢了不好找回。节点加入时需要
kubeadm join 10.1.69.101:6443 --token ou5pvo.qseafc4s8licblzy \
    --discovery-token-ca-cert-hash sha256:de9c10f11c50c074f212698b9d514fc12a9c1c4ffe70961aff89ac5e585f0663

3. 拷贝配置，给kubectl使用

1
2
3

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

4. 安装flannel网络

sudo kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
## 查看flannal是否安装成功
sudo kubectl -n kube-system get po -l app=flannel -o wide

5. node节点加入集群

其他节点执行：

1 2	kubeadm join 10.1.69.101:6443 --token ou5pvo.qseafc4s8licblzy \ --discovery-token-ca-cert-hash sha256:de9c10f11c50c074f212698b9d514fc12a9c1c4ffe70961aff89ac5e585f0663

清理安装

如果安装过程中出任何问题，可以重置后重新安装

1	sudo kubeadm reset

安装 helm 和 gitlab-runner

tar -zxvf helm-v2.12.1-linux-amd64.tar.gz
cd linux-amd64/
cp helm /usr/local/bin

cat tiller.yaml

---
apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: helm
    name: tiller
  name: tiller-deploy
  namespace: kube-system
spec:
  replicas: 1
  strategy: {}
  selector:
    matchLabels:
      name: tiller
      app: helm
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: helm
        name: tiller
    spec:
      automountServiceAccountToken: true
      containers:
      - env:
        - name: TILLER_NAMESPACE
          value: kube-system
        - name: TILLER_HISTORY_MAX
          value: "0"
        image: registry.cn-hangzhou.aliyuncs.com/google_containers/tiller:v2.14.3
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
            path: /liveness
            port: 44135
          initialDelaySeconds: 1
          timeoutSeconds: 1
        name: tiller
        ports:
        - containerPort: 44134
          name: tiller
        - containerPort: 44135
          name: http
        readinessProbe:
          httpGet:
            path: /readiness
            port: 44135
          initialDelaySeconds: 1
          timeoutSeconds: 1
        resources: {}
      serviceAccountName: tiller
status: {}
---
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: null
  labels:
    app: helm
    name: tiller
  name: tiller-deploy
  namespace: kube-system
spec:
  ports:
  - name: tiller
    port: 44134
    targetPort: tiller
  selector:
    app: helm
    name: tiller
  type: ClusterIP
status:
  loadBalancer: {}

kubectl apply -f tiller.yaml
helm init --upgrade --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
#helm init -i registry.cn-hangzhou.aliyuncs.com/google_containers/tiller:v2.14.3 --stable-repo-url http://mirror.azure.cn/kubernetes/charts/ --service-account tiller --override spec.selector.matchLabels.'name'='tiller',spec.selector.matchLabels.'app'='helm' --output yaml | sed 's@apiVersion: extensions/v1beta1@apiVersion: apps/v1@' | kubectl delete -f -
 
 kubectl create serviceaccount --namespace kube-system tiller
 kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
 kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'
 kubectl get po  -A|grep tiller
 kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'
 helm repo add gitlab https://charts.gitlab.io
 helm repo update
 helm fetch gitlab/gitlab-runner --version "v0.10.0-rc1"
 
 kubectl create namespace gitlab-runners
 mkdir -p gitlab-runner
 cd gitlab-runner/
 kubectl create -f rbac-runner-config.yaml

cat rbac-runner-config.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: gitlab
  namespace: gitlab-runners
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  namespace: gitlab-runners
  name: gitlab
rules:
- apiGroups: [""] #"" indicates the core API group
  resources: ["*"]
  verbs: ["*"]
- apiGroups: ["apps"]
  resources: ["deployments"]
  verbs: ["*"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: gitlab
  namespace: gitlab-runners
subjects:
- kind: ServiceAccount
  name: gitlab # Name is case sensitive
  apiGroup: ""
roleRef:
  kind: Role #this must be Role or ClusterRole
  name: gitlab # this must match the name of the Role or ClusterRole you wish to bind to
  apiGroup: rbac.authorization.k8s.io

cat values-spm-operation-frontend.yaml

## GitLab Runner Image
##
## By default it's using gitlab/gitlab-runner:alpine-v{VERSION}
## where {VERSION} is taken from Chart.yaml from appVersion field
##
## ref: https://hub.docker.com/r/gitlab/gitlab-runner/tags/
##
# image: gitlab/gitlab-runner:alpine-v11.6.0
## Specify a imagePullPolicy
## 'Always' if imageTag is 'latest', else set to 'IfNotPresent'
## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images
##
imagePullPolicy: IfNotPresent
gitlabUrl: http://xxx.xxx.cn/
runnerRegistrationToken: "-Rxxz6Cqdk33Q96mRdxk"
## 和之前配置的 rbac name 对应
## 指定关联 runner 的标签
tags: " operations"
privileged: true
serviceAccountName: gitlab
## The GitLab Server URL (with protocol) that want to register the runner against
## ref: https://docs.gitlab.com/runner/commands/README.html#gitlab-runner-register
##
# gitlabUrl: http://gitlab.your-domain.com/
## The Registration Token for adding new Runners to the GitLab Server. This must
## be retrieved from your GitLab Instance.
## ref: https://docs.gitlab.com/ce/ci/runners/README.html
##
# runnerRegistrationToken: ""
## The Runner Token for adding new Runners to the GitLab Server. This must
## be retrieved from your GitLab Instance. It is token of already registered runner.
## ref: (we don't yet have docs for that, but we want to use existing token)
##
# runnerToken: ""
#
## Unregister all runners before termination
##
## Updating the runner's chart version or configuration will cause the runner container
## to be terminated and created again. This may cause your Gitlab instance to reference
## non-existant runners. Un-registering the runner before termination mitigates this issue.
## ref: https://docs.gitlab.com/runner/commands/README.html#gitlab-runner-unregister
##
unregisterRunners: true
## When stopping ther runner, give it time to wait for it's jobs to terminate.
##
## Updating the runner's chart version or configuration will cause the runner container
## to be terminated with a graceful stop request. terminationGracePeriodSeconds
## instructs Kubernetes to wait long enough for the runner pod to terminate gracefully.
## ref: https://docs.gitlab.com/runner/commands/#signals
terminationGracePeriodSeconds: 3600
## Set the certsSecretName in order to pass custom certficates for GitLab Runner to use
## Provide resource name for a Kubernetes Secret Object in the same namespace,
## this is used to populate the /home/gitlab-runner/.gitlab-runner/certs/ directory
## ref: https://docs.gitlab.com/runner/configuration/tls-self-signed.html#supported-options-for-self-signed-certificates
##
# certsSecretName:
## Configure the maximum number of concurrent jobs
## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-global-section
##
concurrent: 10
## Defines in seconds how often to check GitLab for a new builds
## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-global-section
##
checkInterval: 30
limitLine: 100000
## Configure GitLab Runner's logging level. Available values are: debug, info, warn, error, fatal, panic
## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-global-section
##
# logLevel:
## Configure GitLab Runner's logging format. Available values are: runner, text, json
## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-global-section
##
# logFormat:
## For RBAC support:
rbac:
  create: false
  serviceAccountName: gitlab
  ## Define specific rbac permissions.
  # resources: ["pods", "pods/exec", "secrets"]
  # verbs: ["get", "list", "watch", "create", "patch", "delete"]
  ## Run the gitlab-bastion container with the ability to deploy/manage containers of jobs
  ## cluster-wide or only within namespace
  clusterWideAccess: false
  ## Use the following Kubernetes Service Account name if RBAC is disabled in this Helm chart (see rbac.create)
  ##
  # serviceAccountName: default
## Configure integrated Prometheus metrics exporter
## ref: https://docs.gitlab.com/runner/monitoring/#configuration-of-the-metrics-http-server
metrics:
  enabled: true
## Configuration for the Pods that that the runner launches for each new job
##
runners:
  ## Default container image to use for builds when none is specified
  ##
  image: ubuntu:16.04
  ## Specify one or more imagePullSecrets
  ##
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
  ##
  # imagePullSecrets: []
  ## Specify the image pull policy: never, if-not-present, always. The cluster default will be used if not set.
  ##
  # imagePullPolicy: ""
  ## Defines number of concurrent requests for new job from GitLab
  ## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runners-section
  ##
  # requestConcurrency: 1
  ## Specify whether the runner should be locked to a specific project: true, false. Defaults to true.
  ##
  # locked: true
  ## Specify the tags associated with the runner. Comma-separated list of tags.
  ##
  ## ref: https://docs.gitlab.com/ce/ci/runners/#using-tags
  ##
  # tags: ""
  ## Specify if jobs without tags should be run.
  ## If not specified, Runner will default to true if no tags were specified. In other case it will
  ## default to false.
  ##
  ## ref: https://docs.gitlab.com/ce/ci/runners/#allowing-runners-with-tags-to-pick-jobs-without-tags
  ##
  # runUntagged: true
  ## Run all containers with the privileged flag enabled
  ## This will allow the docker:dind image to run if you need to run Docker
  ## commands. Please read the docs before turning this on:
  ## ref: https://docs.gitlab.com/runner/executors/kubernetes.html#using-docker-dind
  ##
  privileged: false
  ## The name of the secret containing runner-token and runner-registration-token
  # secret: gitlab-runner
  ## Namespace to run Kubernetes jobs in (defaults to the same namespace of this release)
  ##
  # namespace:
  ## Distributed runners caching
  ## ref: https://gitlab.com/gitlab-org/gitlab-runner/blob/master/docs/configuration/autoscale.md#distributed-runners-caching
  ##
  ## If you want to use s3 based distributing caching:
  ## First of all you need to uncomment General settings and S3 settings sections.
  ##
  ## Create a secret 's3access' containing 'accesskey' & 'secretkey'
  ## ref: https://aws.amazon.com/blogs/security/wheres-my-secret-access-key/
  ##
  ## $ kubectl create secret generic s3access \
  ##   --from-literal=accesskey="YourAccessKey" \
  ##   --from-literal=secretkey="YourSecretKey"
  ## ref: https://kubernetes.io/docs/concepts/configuration/secret/
  ##
  ## If you want to use gcs based distributing caching:
  ## First of all you need to uncomment General settings and GCS settings sections.
  ##
  ## Access using credentials file:
  ## Create a secret 'google-application-credentials' containing your application credentials file.
  ## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runnerscachegcs-section
  ## You could configure
  ## $ kubectl create secret generic google-application-credentials \
  ##   --from-file=gcs-applicaton-credentials-file=./path-to-your-google-application-credentials-file.json
  ## ref: https://kubernetes.io/docs/concepts/configuration/secret/
  ##
  ## Access using access-id and private-key:
  ## Create a secret 'gcsaccess' containing 'gcs-access-id' & 'gcs-private-key'.
  ## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html#the-runners-cache-gcs-section
  ## You could configure
  ## $ kubectl create secret generic gcsaccess \
  ##   --from-literal=gcs-access-id="YourAccessID" \
  ##   --from-literal=gcs-private-key="YourPrivateKey"
  ## ref: https://kubernetes.io/docs/concepts/configuration/secret/
  cache: {}
    ## General settings
    # cacheType: s3
    # cachePath: "gitlab_runner"
    # cacheShared: true
    ## S3 settings
    # s3ServerAddress: s3.amazonaws.com
    # s3BucketName:
    # s3BucketLocation:
    # s3CacheInsecure: false
    # secretName: s3access
    ## GCS settings
    # gcsBucketName:
    ## Use this line for access using access-id and private-key
    # secretName: gcsaccess
    ## Use this line for access using google-application-credentials file
    # secretName: google-application-credentials
  ## Build Container specific configuration
  ##
  builds: {}
    # cpuLimit: 200m
    # memoryLimit: 256Mi
    # cpuRequests: 100m
    # memoryRequests: 128Mi
  ## Service Container specific configuration
  ##
  services: {}
    # cpuLimit: 200m
    # memoryLimit: 256Mi
    # cpuRequests: 100m
    # memoryRequests: 128Mi
  ## Helper Container specific configuration
  ##
  helpers: {}
    # cpuLimit: 200m
    # memoryLimit: 256Mi
    # cpuRequests: 100m
    # memoryRequests: 128Mi
    # image: gitlab/gitlab-runner-helper:x86_64-latest
  ## Service Account to be used for runners
  ##
  # serviceAccountName:
  ## If Gitlab is not reachable through $CI_SERVER_URL
  ##
  # cloneUrl:
  ## Specify node labels for CI job pods assignment
  ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
  ##
  # nodeSelector: {}
  ## Specify pod labels for CI job pods
  ##
  # podLabels: {}
  ## Specify annotations for job pods, useful for annotations such as iam.amazonaws.com/role
  # podAnnotations: {}
  ## Configure environment variables that will be injected to the pods that are created while
  ## the build is running. These variables are passed as parameters, i.e. `--env "NAME=VALUE"`,
  ## to `gitlab-runner register` command.
  ##
  ## Note that `envVars` (see below) are only present in the runner pod, not the pods that are
  ## created for each build.
  ##
  ## ref: https://docs.gitlab.com/runner/commands/#gitlab-runner-register
  ##
  # env:
  #   NAME: VALUE
## Configure resource requests and limits
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
  # limits:
  #   memory: 256Mi
  #   cpu: 200m
  # requests:
  #   memory: 128Mi
  #   cpu: 100m
## Affinity for pod assignment
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
##
affinity: {}
## Node labels for pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {
  nodeType: k8s
}
  # Example: The gitlab runner manager should not run on spot instances so you can assign
  # them to the regular worker nodes only.
  # node-role.kubernetes.io/worker: "true"
## List of node taints to tolerate (requires Kubernetes >= 1.6)
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: []
  # Example: Regular worker nodes may have a taint, thus you need to tolerate the taint
  # when you assign the gitlab runner manager with nodeSelector or affinity to the nodes.
  # - key: "node-role.kubernetes.io/worker"
  #   operator: "Exists"
## Configure environment variables that will be present when the registration command runs
## This provides further control over the registration process and the config.toml file
## ref: `gitlab-runner register --help`
## ref: https://docs.gitlab.com/runner/configuration/advanced-configuration.html
##
# envVars:
#   - name: RUNNER_EXECUTOR
#     value: kubernetes
## list of hosts and IPs that will be injected into the pod's hosts file
hostAliases: []
  # Example:
  # - ip: "127.0.0.1"
  #   hostnames:
  #   - "foo.local"
  #   - "bar.local"
  # - ip: "10.1.2.3"
  #   hostnames:
  #   - "foo.remote"
  #   - "bar.remote"
## Annotations to be added to manager pod
##
podAnnotations: {}
  # Example:
  # iam.amazonaws.com/role: <my_role_arn>
## HPA support for custom metrics:
## This section enables runners to autoscale based on defined custom metrics.
## In order to use this functionality, Need to enable a custom metrics API server by
## implementing "custom.metrics.k8s.io" using supported third party adapter
## Example: https://github.com/directxman12/k8s-prometheus-adapter
##
#hpa: {}
  # minReplicas: 1
  # maxReplicas: 10
  # metrics:
  # - type: Pods
  #   pods:
  #     metricName: gitlab_runner_jobs
  #     targetAverageValue: 400m

Helm for gitlab configmap

cat templates/configmap.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "gitlab-runner.fullname" . }}
  labels:
    app: {{ include "gitlab-runner.fullname" . }}
    chart: {{ include "gitlab-runner.chart" . }}
    release: "{{ .Release.Name }}"
    heritage: "{{ .Release.Service }}"
data:
  entrypoint: |
    #!/bin/bash
    set -e
    mkdir -p /home/gitlab-runner/.gitlab-runner/
    cp /scripts/config.toml /home/gitlab-runner/.gitlab-runner/
    # Register the runner
    if [[ -f /secrets/accesskey && -f /secrets/secretkey ]]; then
      export CACHE_S3_ACCESS_KEY=$(cat /secrets/accesskey)
      export CACHE_S3_SECRET_KEY=$(cat /secrets/secretkey)
    fi
    if [[ -f /secrets/gcs-applicaton-credentials-file ]]; then
      export GOOGLE_APPLICATION_CREDENTIALS="/secrets/gcs-applicaton-credentials-file"
    else
      if [[ -f /secrets/gcs-access-id && -f /secrets/gcs-private-key ]]; then
        export CACHE_GCS_ACCESS_ID=$(cat /secrets/gcs-access-id)
        # echo -e used to make private key multiline (in google json auth key private key is oneline with \n)
        export CACHE_GCS_PRIVATE_KEY=$(echo -e $(cat /secrets/gcs-private-key))
      fi
    fi
    if [[ -f /secrets/runner-registration-token ]]; then
      export REGISTRATION_TOKEN=$(cat /secrets/runner-registration-token)
    fi
    if [[ -f /secrets/runner-token ]]; then
      export CI_SERVER_TOKEN=$(cat /secrets/runner-token)
    fi
    if ! sh /scripts/register-the-runner; then
      exit 1
    fi
    cat >>/home/gitlab-runner/.gitlab-runner/config.toml <<EOF
      output_limit = 100000
      [[runners.kubernetes.volumes.pvc]]
        name = "gitlab-runner-maven-repo-claim"
        mount_path = "/home/cache/maven"
      [[runners.kubernetes.volumes.host_path]]
        name = "docker"
        mount_path = "/var/run/docker.sock"
    EOF
    # Start the runner
    exec /entrypoint run --user=gitlab-runner \
      --working-directory=/home/gitlab-runner
  config.toml: |
    concurrent = {{ .Values.concurrent }}
    check_interval = {{ .Values.checkInterval }}
    log_level = {{ default "info" .Values.logLevel | quote }}
    output_limit = {{ default 10000 .Values.limitLine}}
    {{- if .Values.logFormat }}
    log_format = {{ .Values.logFormat | quote }}
    {{- end }}
    {{- if .Values.metrics.enabled }}
    listen_address = '[::]:9252'
    {{- end }}
  configure: |
    set -e
    cp /init-secrets/* /secrets
  register-the-runner: |
    #!/bin/bash
    MAX_REGISTER_ATTEMPTS=30
    for i in $(seq 1 "${MAX_REGISTER_ATTEMPTS}"); do
      echo "Registration attempt ${i} of ${MAX_REGISTER_ATTEMPTS}"
      /entrypoint register \
        {{- range .Values.runners.imagePullSecrets }}
        --kubernetes-image-pull-secrets {{ . | quote }} \
        {{- end }}
        {{- range $key, $val := .Values.runners.nodeSelector }}
        --kubernetes-node-selector {{ $key | quote }}:{{ $val | quote }} \
        {{- end }}
        {{- range $key, $value := .Values.runners.podLabels }}
        --kubernetes-pod-labels {{ $key | quote }}:{{ $value | quote }} \
        {{- end }}
        {{- range $key, $val := .Values.runners.podAnnotations }}
        --kubernetes-pod-annotations {{ $key | quote }}:{{ $val | quote }} \
        {{- end }}
        {{- range $key, $value := .Values.runners.env }}
        --env {{ $key | quote -}} = {{- $value | quote }} \
        {{- end }}
        {{- if and (hasKey .Values.runners "runUntagged") .Values.runners.runUntagged }}
        --run-untagged=true \
        {{- end }}
        --non-interactive
      retval=$?
      if [ ${retval} = 0 ]; then
        break
      elif [ ${i} = ${MAX_REGISTER_ATTEMPTS} ]; then
        exit 1
      fi
      sleep 5
    done
    exit 0
  check-live: |
    #!/bin/bash
    if /usr/bin/pgrep -f .*register-the-runner; then
      exit 0
    elif /usr/bin/pgrep gitlab.*runner; then
      exit 0
    else
      exit 1
    fi

gitlab-runner

1 2	helm install --name xxx gitlab/gitlab-runner -f values-spm-labour.yaml --namespace gitlab-runners --version "v0.10.0-rc1" helm upgrade xxx ./gitlab-runner/

创建runner maven 缓存

Java 需要

cat maven-nfs.yaml

apiVersion: v1
kind: PersistentVolume
metadata:
  name: gitlab-runner-maven-repo
spec:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 5Gi
  mountOptions:
  - nolock
  nfs:
    path: /data/nfs/volumes
    server: YOURIPADDRESS
  persistentVolumeReclaimPolicy: Recycle
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: gitlab-runner-maven-repo-claim
  namespace: gitlab-runners
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 5Gi
  volumeName: gitlab-runner-maven-repo
status:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 5Gi

生成maven缓存PVC

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: gitlab-runner-maven-repo-claim
  namespace: gitlab-runners
spec:
  accessModes:
  - ReadWriteMany
    resources:
    requests:
      storage: 5Gi
    volumeName: gitlab-runner-maven-repo
status:
    accessModes:
  - ReadWriteMany
    capacity:
    storage: 5Gi

代码及镜像上传

##如果删除
helm del --purge spm-labour
##生成gitlab namespace
kubectl create ns gitlab
### 代码镜像上传密钥
kubectl create secret docker-registry  huawei-auth --docker-server=XXX.myhuaweicloud.com --docker-username=XXX --docker-password=XXXX -ngitlab
###kubectl create secret docker-registry  aliyun-auth --docker-server=registry.cn-zhangjiakou.aliyuncs.com --docker-username=xxx --docker-password=xxx -ngitlab

其它及遇到的坑

GitLab集成k8s 签名错误

1 2	kubectl get secrets kubectl get secret default-token-52xsf -o jsonpath="{['data']['ca\.crt']}" \| base64 --decode

cat gitlab-admin-service-account.yaml

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: gitlab
  namespace: gitlab
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: gitlab
  namespace: gitlab
subjects:
  - kind: ServiceAccount
    name: gitlab
    namespace: gitlab
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin

获取 Service Token

1	kubectl -n gitlab describe secret $(kubectl -n gitlab get secret \| grep gitlab\| awk '{print $1}')

记k8s在公网部署

发表于 2020-10-29 |

准备工作

1. 安装必备软件

1	yum install -y wget vim net-tools epel-release

2. 关闭swap

swapoff -a
# 永久禁用，打开/etc/fstab注释掉swap那一行。
sed -i 's/.*swap.*/#&/' /etc/fstab

3. 关闭selinux

# 临时禁用selinux
setenforce 0
# 永久关闭 修改/etc/sysconfig/selinux文件设置
sed -i 's/SELINUX=permissive/SELINUX=disabled/' /etc/sysconfig/selinux
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config

4. 关闭防火墙

1 2	systemctl disable firewalld systemctl stop firewalld

安装Docker和配置代理

1. 配置yum源

## 配置默认源
## 备份
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
## 下载阿里源
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
## 刷新
yum makecache fast
## 配置k8s源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
EOF
## 重建yum缓存
yum clean all
yum makecache fast
yum -y update

2. 安装docker

yum -y install yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum -y install docker-ce
systemctl enable docker
systemctl start docker

3. 确保kubelet使用的cgroup driver 与 Docker的一致

cat > /etc/docker/daemon.json <<EOF
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2",
  "storage-opts": [
    "overlay2.override_kernel_check=true"
  ]
}
EOF
sudo systemctl daemon-reload sudo systemctl restart docker

4. 设置docker代理（核心步骤-如果有代理）

没有代理执行步骤5

mkdir /etc/systemd/system/docker.service.d
touch /etc/systemd/system/docker.service.d/http-proxy.conf
[Service]
Environment="HTTP_PROXY=http://xxx"
Environment="HTTPS_PROXY=http://xxx"
Environment="NO_PROXY=localhost,127.0.0.1,localaddress,.localdomain.com"
systemctl daemon-reload && systemctl restart docker

5. 从国内仓库拉取镜像（核心步骤-如果没有代理）

1 2	## 查看集群初始化所需镜像及对应依赖版本号，列出的就是需要下载的镜像 kubeadm config images list

#!/bin/bash
images=(
kube-apiserver:v1.18.0
kube-controller-manager:v1.18.0
kube-scheduler:v1.18.0
kube-proxy:v1.18.0
pause:3.2
etcd:3.4.3-0
coredns:1.6.7
kubernetes-dashboard-amd64:v1.10.1
)
for imageName in ${images[@]};do
        docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/$imageName
        docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/$imageName k8s.gcr.io/$imageName
done

开始安装k8s

1. 安装kubeadm，kubelet等

1 2	yum -y install kubelet kubeadm kubectl kubernetes-cni systemctl enable kubelet && systemctl start kubelet

2. 集群初始化

## master节点执行：
sudo kubeadm init \
 --apiserver-advertise-address 10.1.69.101 \
 --kubernetes-version=v1.15.0 \
 --pod-network-cidr=10.244.0.0/16

公网会卡在:

1
2

[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.

修改etcd.yaml

文件路径”/etc/kubernetes/manifests/etcd.yaml”

修改前

修改后

此处”xxx”为公网ip，要关注的是”–listen-client-urls”和”–listen-peer-urls”。需要把–listen-client-urls 和 –listen-peer-urls 都改成0.0.0.0：xxx

来源

得到回复：


(...省略)
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
## 保存好该命令，丢了不好找回。节点加入时需要
kubeadm join 10.1.69.101:6443 --token ou5pvo.qseafc4s8licblzy \
    --discovery-token-ca-cert-hash sha256:de9c10f11c50c074f212698b9d514fc12a9c1c4ffe70961aff89ac5e585f0663

3. 拷贝配置，给kubectl使用

1
2
3

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

4. 安装flannel网络

sudo kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
## 查看flannal是否安装成功
sudo kubectl -n kube-system get po -l app=flannel -o wide

5. node节点加入集群

其他节点执行：

1	kubeadm join 39.99.xxx.xxx:6443 --token w4tx2r.0dm194h24wlw8m8t --discovery-token-ca-cert-hash sha256:caae3b178b4172839269254f373b6eaa14514173b01e129e0b0fe7d64694dc39

清理安装

如果安装过程中出任何问题，可以重置后重新安装

1	sudo kubeadm reset

Dashboard安装

# 安装dashboard，国内可以使用别的yaml源
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v1.10.1/src/deploy/recommended/kubernetes-dashboard.yaml
# 修改node为NodePort模式
kubectl patch svc -n kube-system kubernetes-dashboard -p '{"spec":{"type":"NodePort"}}'
# 查看服务(得知dashboard运行在443:32383/TCP端口)
kubectl get svc -n kube-system 
# --- 输出 ---
NAME                   TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                  AGE
kube-dns               ClusterIP   10.96.0.10      <none>        53/UDP,53/TCP,9153/TCP   7h40m
kubernetes-dashboard   NodePort    10.111.77.210   <none>        443:32383/TCP            3h42m
# --- 输出 ---
# 查看dashboard运行在哪个node(得知dashboard运行在192.168.20.4)
kubectl get pods -A -o wide
# --- 输出 ---
NAMESPACE     NAME                                    READY   STATUS    RESTARTS   AGE     IP             NODE     NOMINATED NODE   READINESS GATES
kube-system   coredns-fb8b8dccf-rn8kd                 1/1     Running   0          7h43m   10.244.0.2     master   <none>           <none>
kube-system   coredns-fb8b8dccf-slwr4                 1/1     Running   0          7h43m   10.244.0.3     master   <none>           <none>
kube-system   etcd-master                             1/1     Running   0          7h42m   192.168.20.5   master   <none>           <none>
kube-system   kube-apiserver-master                   1/1     Running   0          7h42m   192.168.20.5   master   <none>           <none>
kube-system   kube-controller-manager-master          1/1     Running   0          7h42m   192.168.20.5   master   <none>           <none>
kube-system   kube-flannel-ds-amd64-l8c7c             1/1     Running   0          7h3m    192.168.20.5   master   <none>           <none>
kube-system   kube-flannel-ds-amd64-lcmxw             1/1     Running   1          6h50m   192.168.20.4   node1    <none>           <none>
kube-system   kube-flannel-ds-amd64-pqnln             1/1     Running   1          6h5m    192.168.20.3   node2    <none>           <none>
kube-system   kube-proxy-4kcqb                        1/1     Running   0          7h43m   192.168.20.5   master   <none>           <none>
kube-system   kube-proxy-jcqjd                        1/1     Running   0          6h5m    192.168.20.3   node2    <none>           <none>
kube-system   kube-proxy-vm9sj                        1/1     Running   0          6h50m   192.168.20.4   node1    <none>           <none>
kube-system   kube-scheduler-master                   1/1     Running   0          7h42m   192.168.20.5   master   <none>           <none>
kube-system   kubernetes-dashboard-5f7b999d65-2ltmv   1/1     Running   0          3h45m   10.244.1.2     node1    <none>           <none>
# --- 输出 ---
# 如果无法变成Running状态，可以使用以下命令排错
journalctl -f -u kubelet  # 只看当前的kubelet进程日志
### 提示拉取镜像失败，无法翻墙的可以使用以下方法预先拉取镜像
### 请在kubernetes-dashboard的节点上操作：
docker pull mirrorgooglecontainers/kubernetes-dashboard-amd64:v1.10.1
docker tag mirrorgooglecontainers/kubernetes-dashboard-amd64:v1.10.1 k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1
docker rmi  mirrorgooglecontainers/kubernetes-dashboard-amd64:v1.10.1

获取token命令

1
2
3

## 获取登陆界面token
kubectl -n kube-system describe $(kubectl -n kube-system \
get secret -n kube-system -o name | grep namespace) | grep token

如果dashboard的token过期，以下脚本重新生成config文件

1
2
3

#!/bin/bash
TOKEN=$(kubectl -n kube-system describe secret default| awk '$1=="token:"{print $2}')
kubectl config set-credentials kubernetes-admin --token="${TOKEN}"