鲲鹏（ARM64）+麒麟（Kylin v10）离线部署 KubeSphere

发表于 2025-06-25 |

确认操作系统配置

操作系统类型

[root@localhost ~]# cat /etc/os-release
NAME="Kylin Linux Advanced Server"
VERSION="V10 (Halberd)"
ID="kylin"
VERSION_ID="V10"
PRETTY_NAME="Kylin Linux Advanced Server V10 (Halberd)"
ANSI_COLOR="0;31

操作系统内核

1 2	[root@node1 ~]# uname -r Linux node1 4.19.90-52.22.v2207.ky10.aarch64

服务器 CPU 信息

[root@node1 ~]# lscpu
Architecture:                    aarch64
CPU op-mode(s):                  64-bit
Byte Order:                      Little Endian
CPU(s):                          32
On-line CPU(s) list:             0-31
Thread(s) per core:              1
Core(s) per socket:              1
Socket(s):                       32
NUMA node(s):                    2
Vendor ID:                       HiSilicon
Model:                           0
Model name:                      Kunpeng-920
Stepping:                        0x1
BogoMIPS:                        200.00
NUMA node0 CPU(s):               0-15
NUMA node1 CPU(s):               16-31
Vulnerability Itlb multihit:     Not affected
Vulnerability L1tf:              Not affected
Vulnerability Mds:               Not affected
Vulnerability Meltdown:          Not affected
Vulnerability Spec store bypass: Not affected
Vulnerability Spectre v1:        Mitigation; __user pointer sanitization
Vulnerability Spectre v2:        Not affected
Vulnerability Srbds:             Not affected
Vulnerability Tsx async abort:   Not affected
Flags:                           fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma dcpop asimddp asimdfhm

安装 K8s 依赖服务

下载相关: https://pan.baidu.com/s/1UnKJyu3oA_MImCTOsL-wDg?pwd=by92 提取码: by92

使用 KubeKey 部署 KubeSphere

移除麒麟系统自带的 podman

podman 是麒麟系统自带的容器引擎，为避免后续与 Docker 冲突，直接卸载。否则后续 CoreDNS/NodelocalDNS 也会受影响无法启动以及各种 Docker 权限问题。

1	yum remove podman

下载 KubeKey

下载 kubekey-v2.3.1-linux-arm64.tar.gz。

cd ~
mkdir kubesphere
cd kubesphere/
# 选择中文区下载(访问 GitHub 受限时使用)
export KKZONE=cn
# 执行下载命令，获取最新版的 kk（受限于网络，有时需要执行多次）
curl -sfL https://get-kk.kubesphere.io/v2.3.1/kubekey-v2.3.1-linux-arm64.tar.gz | tar xzf -

生成集群创建配置文件

创建集群配置文件

1	./kk create config -f kubesphere-v331-v12212.yaml --with-kubernetes v1.22.12 --with-kubesphere v3.3.1

命令执行成功后，在当前目录会生成文件名为 kubesphere-v331-v12212.yaml 配置文件:

apiVersion: kubekey.kubesphere.io/v1alpha2
kind: Cluster
metadata:
  name: sample
spec:
  hosts:
    - {name: master, address: xxx.xxx.xxx.xxx, internalAddress: xxx.xxx.xxx.xxx,port: xxxxxx, user: root, password: "xxxxxxx",arch: arm64}
  roleGroups:
    etcd:
    - master
    control-plane:
    - master
    worker:
    - master
  controlPlaneEndpoint:
    ## Internal loadbalancer for apiservers
    # internalLoadbalancer: haproxy
    domain: lb.kubesphere.local
    address: ""
    port: 6443
  kubernetes:
    version: v1.22.12
    clusterName: cluster.local
    autoRenewCerts: true
    containerManager: docker
  etcd:
    type: kubekey
  network:
    plugin: calico
    kubePodsCIDR: 10.233.64.0/18
    kubeServiceCIDR: 10.233.0.0/18
    ## multus support. https://github.com/k8snetworkplumbingwg/multus-cni
    multusCNI:
      enabled: false
  registry:
    type: harbor
    auths:
      "dockerhub.kubekey.local":
        username: admin
        password: Harbor12345
    privateRegistry: "dockerhub.kubekey.local"
    namespaceOverride: "kubesphereio"
    registryMirrors: []
    insecureRegistries: []
  addons: []
---
apiVersion: installer.kubesphere.io/v1alpha1
kind: ClusterConfiguration
metadata:
  name: ks-installer
  namespace: kubesphere-system
  labels:
    version: v3.3.1
spec:
  persistence:
    storageClass: ""
  authentication:
    jwtSecret: ""
  zone: ""
  local_registry: ""
  namespace_override: ""
  # dev_tag: ""
  etcd:
    monitoring: true
    endpointIps: localhost
    port: 2379
    tlsEnable: true
  common:
    core:
      console:
        enableMultiLogin: true
        port: 30880
        type: NodePort
    # apiserver:
    #  resources: {}
    # controllerManager:
    #  resources: {}
    redis:
      enabled: false
      volumeSize: 2Gi
    openldap:
      enabled: false
      volumeSize: 2Gi
    minio:
      volumeSize: 20Gi
    monitoring:
      # type: external
      endpoint: http://prometheus-operated.kubesphere-monitoring-system.svc:9090
      GPUMonitoring:
        enabled: false
    gpu:
      kinds:
      - resourceName: "nvidia.com/gpu"
        resourceType: "GPU"
        default: true
    es:
      # master:
      #   volumeSize: 4Gi
      #   replicas: 1
      #   resources: {}
      # data:
      #   volumeSize: 20Gi
      #   replicas: 1
      #   resources: {}
      logMaxAge: 7
      elkPrefix: logstash
      basicAuth:
        enabled: false
        username: ""
        password: ""
      externalElasticsearchHost: ""
      externalElasticsearchPort: ""
  alerting:
    enabled: false
    # thanosruler:
    #   replicas: 1
    #   resources: {}
  auditing:
    enabled: false
    # operator:
    #   resources: {}
    # webhook:
    #   resources: {}
  devops:
    enabled: false
    # resources: {}
    jenkinsMemoryLim: 8Gi
    jenkinsMemoryReq: 4Gi
    jenkinsVolumeSize: 8Gi
  events:
    enabled: false
    # operator:
    #   resources: {}
    # exporter:
    #   resources: {}
    # ruler:
    #   enabled: true
    #   replicas: 2
    #   resources: {}
  logging:
    enabled: false
    logsidecar:
      enabled: true
      replicas: 2
      # resources: {}
  metrics_server:
    enabled: false
  monitoring:
    storageClass: ""
    node_exporter:
      port: 9100
      # resources: {}
    # kube_rbac_proxy:
    #   resources: {}
    # kube_state_metrics:
    #   resources: {}
    # prometheus:
    #   replicas: 1
    #   volumeSize: 20Gi
    #   resources: {}
    #   operator:
    #     resources: {}
    # alertmanager:
    #   replicas: 1
    #   resources: {}
    # notification_manager:
    #   resources: {}
    #   operator:
    #     resources: {}
    #   proxy:
    #     resources: {}
    gpu:
      nvidia_dcgm_exporter:
        enabled: false
        # resources: {}
  multicluster:
    clusterRole: none
  network:
    networkpolicy:
      enabled: false
    ippool:
      type: none
    topology:
      type: none
  openpitrix:
    store:
      enabled: false
  servicemesh:
    enabled: false
    istio:
      components:
        ingressGateways:
        - name: istio-ingressgateway
          enabled: false
        cni:
          enabled: false
  edgeruntime:
    enabled: false
    kubeedge:
      enabled: false
      cloudCore:
        cloudHub:
          advertiseAddress:
            - ""
        service:
          cloudhubNodePort: "30000"
          cloudhubQuicNodePort: "30001"
          cloudhubHttpsNodePort: "30002"
          cloudstreamNodePort: "30003"
          tunnelNodePort: "30004"
        # resources: {}
        # hostNetWork: false
      iptables-manager:
        enabled: true
        mode: "external"
        # resources: {}
      # edgeService:
      #   resources: {}
  terminal:
    timeout: 600

执行安装

1	./kk create cluster -f kubesphere-v331-v12212.yaml

说明

修改 kind: Cluster 小节中 hosts 和 roleGroups 等信息，修改说明如下：

hosts：指定节点的 IP、ssh 用户、ssh 密码、ssh 端口。

特别注意： 一定要手工指定 arch: arm64，否则部署的时候会安装 X86 架构的软件包

k8s环境流量不走apisix

发表于 2025-06-25 |

当 SSL 流量未经过 APISIX 而走了 Traefik 时，核心原因是K3s 默认使用 Traefik 作为 Ingress Controller，而 APISIX 未正确接管流量。以下是针对性解决方案：
一、确认 Ingress 资源的 Controller 归属

检查 Ingress 是否指定 APISIX 控制器
问题场景：Kubernetes Ingress 资源未声明使用 APISIX，导致 Traefik 默认处理。
解决步骤：
查看 Ingress 配置：kubectl describe ingress <ingress-name> -n <namespace>，确认是否包含以下注解：

1
2
3

metadata:
  annotations:
    kubernetes.io/ingress.class: "apisix"  # 关键注解，指定APISIX为控制器

修复示例：若缺少该注解，使用kubectl edit ingress 添加：

1
2
3

metadata:
  annotations:
    kubernetes.io/ingress.class: "apisix"

查看集群默认 Ingress Controller
问题场景：K3s 默认将 Traefik 设为默认 Ingress Controller。
解决步骤：
查看 Traefik 的 ConfigMap：kubectl get configmap traefik-config -n kube-system -o yaml，确认是否包含ingressClass: “traefik”。
若需将 APISIX 设为默认，需修改 APISIX 的 Helm 配置，添加：
1
2
3
4
5
6
7
8
9
apisix:
ingressController:
config:
controller:
ingressClass: "apisix"
ingressClassResource:
enabled: true
name: "apisix"
default: true # 设为默认控制器

重新部署 APISIX：helm upgrade apisix -f values.yaml
二、确保 APISIX 服务正确暴露 HTTPS

检查 APISIX 服务端口配置
问题场景：APISIX 未正确暴露 HTTPS 端口（默认 9443），或流量未指向该端口。
解决步骤：
查看 APISIX 服务：kubectl get svc apisix-gateway -n ，确认443:9443/TCP端口存在（NodePort 或 LoadBalancer 类型）。
若服务类型为 NodePort，测试节点 IP+NodePort 是否可达：curl https://:。
若服务类型为 LoadBalancer，测试 LB IP 是否指向 APISIX：curl https://。
验证 APISIX 的 SSL 证书配置
问题场景：APISIX 未配置证书，或证书与域名不匹配，导致流量被 Traefik 拦截（Traefik 可能有默认证书）。
解决步骤：
通过 APISIX Dashboard 或配置文件，确认 SSL 证书已正确绑定域名（如example.com）。
检查 APISIX 的 TLS 配置（Helm values）：
1
2
3
4
5
6
7
apisix:
tls:
enabled: true
certificates:
- secretName: apisix-tls-secret
hosts:
- example.com

若 Traefik 有默认证书（如TRAEFIK DEFAULT CERT），需确保 APISIX 的证书优先级更高。
三、禁用或隔离 Traefik

临时停用 Traefik（测试用）
操作步骤：
暂停 Traefik Deployment：kubectl scale deployment traefik -n kube-system –replicas=0
测试 SSL 流量是否转向 APISIX：curl -v https://example.com（查看响应头是否含server: APISIX）
若正常，说明 Traefik 确实拦截了流量，需永久调整配置。

修改 Traefik 配置避免冲突
问题场景：Traefik 仍监听 443 端口，与 APISIX 竞争流量。
解决步骤：
修改 Traefik 的监听端口（非默认 443），例如改为 9444：

# Traefik的ConfigMap中添加
apiVersion: v1
kind: ConfigMap
metadata:
  name: traefik-config
  namespace: kube-system
data:
  traefik.yml: |
    entryPoints:
      websecure:
        address: ":9444"  # 改为非443端口

重启 Traefik 后，443 端口将由 APISIX 独占。
四、验证流量走向

查看响应头确认控制器
命令测试：
bash
访问域名，查看响应头
curl -v https://example.com 2>&1 | grep “server:”

预期结果（APISIX处理）

server: APISIX

若结果为Traefik

server: traefik

检查 Traefik 日志
操作步骤：
bash
kubectl logs -n kube-system | grep “example.com”

若存在请求记录，说明 Traefik 仍在处理流量，需回到前几步检查 Ingress 归属和端口配置。
五、典型配置示例（APISIX 接管 Ingress）

APISIX 专属 Ingress 资源

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
 name: apisix-ingress
 namespace: my-namespace
 annotations:
 kubernetes.io/ingress.class: "apisix"  # 关键注解
spec:
 tls:
 - hosts:
     - example.com
     secretName: apisix-tls-secret
   rules:
 - host: example.com
   http:
     paths:
       - path: /
         pathType: Prefix
         backend:
           service:
             name: my-service
             port:
               number: 80

APISIX 作为默认 Ingress Controller

# Helm values.yaml中添加
apisix:
  ingressController:
    config:
      controller:
        ingressClass: "apisix"
        ingressClassResource:
          enabled: true
          name: "apisix"
          default: true  # 设为集群默认Ingress Controller

总结
流量走 Traefik 而非 APISIX 的核心原因是Ingress 资源未指定 APISIX 为控制器或Traefik 仍占用 443 端口。通过指定 Ingress 归属、调整服务端口、禁用 Traefik 冲突端口，可确保 SSL 流量正确路由至 APISIX。建议按 “配置 Ingress 注解→验证服务端口→隔离 Traefik→测试响应头” 的流程逐步排查。

Helm 创建高可用mysql集群

发表于 2023-05-13 |

`helm` 安装 `MySQL 1主2从`#

1. 添加 `bitnami` 的仓库#

1	Copy$ helm repo add kubegemsapp https://charts.kubegems.io/kubegemsapp

2. 查询 `MySQL` 资源#

$ helm repo update
$ helm search repo mysql
NAME                   	CHART VERSION	APP VERSION	DESCRIPTION                                       
bitnami/mysql          	8.9.6        	8.0.29     	MySQL is a fast, reliable, scalable, and easy t...
bitnami/phpmyadmin     	10.0.1       	5.1.3      	phpMyAdmin is a free software tool written in P...
bitnami/mariadb        	11.0.2       	10.6.7     	MariaDB is an open source, community-developed ...
bitnami/mariadb-cluster	1.0.2        	10.2.14    	DEPRECATED Chart to create a Highly available M...
bitnami/mariadb-galera 	7.1.8        	10.6.7     	MariaDB Galera is a multi-primary database clus...

3. 拉取 `MySQL chart` 到本地#

$ mkdir /root/mysql && cd /root/mysql
# 拉取 chart 到本地 /root/mysql 目录
$ helm pull kubegemsapp/mysql --version 4.5.2
$ tar -xvf mysql-4.5.2.tgz
$ cp mysql/values.yaml ./values.yaml
# 查看当前目录层级
$ tree -L 2
.
├── mysql
│   ├── Chart.yaml
│   ├── files
│   ├── README.md
│   ├── templates
│   ├── values-production.yaml
│   └── values.yaml
├── mysql-4.5.2.tgz
└── values.yaml

4. 对本地 `values-test.yaml` 修改#

查看集群 storageclasses

$ kubectl get storageclasses.storage.k8s.io 
NAME                   PROVISIONER           RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE
openebs-device         openebs.io/local      Delete          WaitForFirstConsumer   false                  34d
openebs-hostpath       openebs.io/local      Delete          WaitForFirstConsumer   false                  34d
openebs-jiva-default   jiva.csi.openebs.io   Delete          Immediate              true                   33d

修改配置

$ cat values.yaml 
nameOverride: mysql
fullnameOverride: mysql
image:
  registry: registry.cn-beijing.aliyuncs.com
  repository: kubegemsapp/mysql
  tag: 5.7.26
  pullPolicy: IfNotPresent
service:
  type: ClusterIP
  port: 3306
securityContext:
  enabled: true
  fsGroup: 1001
  runAsUser: 1001
root:
  password: Huazhu@2021
  forcePassword: true
db:
  user:  huazhu
  password: Huazhu@2021
  name: my_database
  forcePassword: true
replication:
  enabled: true
  #user: huazhu_replicator
  user: root
  password: Huazhu@2021
  forcePassword: true
master:
  antiAffinity: soft
  updateStrategy:
    type: RollingUpdate
  persistence:
    enabled: true
    storageClass: "managed-nfs-storage"
    mountPath: /bitnami/mysql
    annotations:
    accessModes:
    - ReadWriteOnce
    size: 5Gi
  config: |-
    [mysqld]
    default_authentication_plugin=mysql_native_password
    skip-name-resolve
    explicit_defaults_for_timestamp
    basedir=/opt/bitnami/mysql
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    tmpdir=/opt/bitnami/mysql/tmp
    max_allowed_packet=16M
    bind-address=0.0.0.0
    pid-file=/opt/bitnami/mysql/tmp/mysqld.pid
    log-error=/opt/bitnami/mysql/logs/mysqld.log
    character-set-server=UTF8
    collation-server=utf8_general_ci
    [client]
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    default-character-set=UTF8
    [manager]
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    pid-file=/opt/bitnami/mysql/tmp/mysqld.pid
  resources: {}
  livenessProbe:
    enabled: true
    initialDelaySeconds: 120
    periodSeconds: 10
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
  readinessProbe:
    enabled: true
    initialDelaySeconds: 15
    periodSeconds: 10
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
slave:
  replicas: 1
  antiAffinity: soft
  updateStrategy:
    type: RollingUpdate
  persistence:
    enabled: true
    storageClass: "managed-nfs-storage"
    mountPath: /bitnami/mysql
    annotations:
    accessModes:
    - ReadWriteOnce
    size: 5Gi
  config: |-
    [mysqld]
    default_authentication_plugin=mysql_native_password
    skip-name-resolve
    explicit_defaults_for_timestamp
    basedir=/opt/bitnami/mysql
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    tmpdir=/opt/bitnami/mysql/tmp
    max_allowed_packet=16M
    bind-address=0.0.0.0
    pid-file=/opt/bitnami/mysql/tmp/mysqld.pid
    log-error=/opt/bitnami/mysql/logs/mysqld.log
    character-set-server=UTF8
    collation-server=utf8_general_ci
    [client]
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    default-character-set=UTF8
    [manager]
    port=3306
    socket=/opt/bitnami/mysql/tmp/mysql.sock
    pid-file=/opt/bitnami/mysql/tmp/mysqld.pid
  resources: {}
  livenessProbe:
    enabled: true
    initialDelaySeconds: 120
    periodSeconds: 30
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
  readinessProbe:
    enabled: true
    initialDelaySeconds: 15
    periodSeconds: 30
    timeoutSeconds: 1
    successThreshold: 1
    failureThreshold: 3
metrics:
  enabled: false
  image:
    registry: registry.cn-beijing.aliyuncs.com
    repository: kubegemsapp/mysqld-exporter
    tag: v0.10.0
    pullPolicy: IfNotPresent
  resources: {}
  annotations:
    prometheus.io/scrape: "true"
    prometheus.io/port: "9104"

6. 安装 `MySQL 集群`

helm install --namespace mysql mysql-ha -f ./values.yaml ./mysql  --set auth.rootPassword=Huazhu@2021
NAME: mysql-cluster
LAST DEPLOYED: Mon May  9 01:54:38 2022
NAMESPACE: test-middleware
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
Please be patient while the chart is being deployed
Tip:
  Watch the deployment status using the command: kubectl get pods -w --namespace test-middleware
Services:
  echo Master: mysql-cluster-mysql.test-middleware.svc.cluster.local:3306
  echo Slave:  mysql-cluster-mysql-slave.test-middleware.svc.cluster.local:3306
Administrator credentials:
  echo Username: root
  echo Password : $(kubectl get secret --namespace test-middleware mysql-cluster-mysql -o jsonpath="{.data.mysql-root-password}" | base64 --decode)
To connect to your database:
  1. Run a pod that you can use as a client:
      kubectl run mysql-cluster-mysql-client --rm --tty -i --restart='Never' --image  docker.io/bitnami/mysql:5.7.26 --namespace test-middleware --command -- bash
  2. To connect to master service (read/write):
      mysql -h mysql-cluster-mysql.test-middleware.svc.cluster.local -uroot -p my_database
  3. To connect to slave service (read-only):
      mysql -h mysql-cluster-mysql-slave.test-middleware.svc.cluster.local -uroot -p my_database
To upgrade this helm chart:
  1. Obtain the password as described on the 'Administrator credentials' section and set the 'root.password' parameter as shown below:
      ROOT_PASSWORD=$(kubectl get secret --namespace test-middleware mysql-cluster-mysql -o jsonpath="{.data.mysql-root-password}" | base64 --decode)
      helm upgrade mysql-cluster bitnami/mysql --set root.password=$ROOT_PASSWORD

7. 查看部署的 `MySQL` 集群#

$ helm -n mysql list
NAME         	NAMESPACE      	REVISION	UPDATED                                	STATUS  	CHART      	APP VERSION
mysql-cluster	test-middleware	1       	2022-05-09 01:54:38.848559008 -0400 EDT	deployed	mysql-4.5.2	5.7.26
$ kubectl -n mysql get pods -l app=mysql
NAME                           READY   STATUS    RESTARTS   AGE
mysql-cluster-mysql-master-0   1/1     Running   0          16m
mysql-cluster-mysql-slave-0    1/1     Running   0          16m
mysql-cluster-mysql-slave-1    1/1     Running   0          14m
> mysql-cluster-mysql-master-0 为主，mysql-cluster-mysql-slave-0 和 mysql-cluster-mysql-slave-1 为从
> default名称空间如何访问此 MySQL 集群
> MySQL主节点：mysql-cluster-mysql.test-middleware
> MySQL从节点0：mysql-cluster-mysql-slave-0.mysql-cluster-mysql-slave.test-middleware
> MySQL从节点1：mysql-cluster-mysql-slave-1.mysql-cluster-mysql-slave.test-middleware

查看服务使用的 storageclass

Copy# 查看 pvc
$ kubectl -n test-middleware get pvc
NAME                                STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS           AGE
data-mysql-cluster-mysql-master-0   Bound    pvc-b9a1d1ca-44d3-4292-af45-e6f3b3063395   8Gi        RWO            openebs-jiva-default   31m
data-mysql-cluster-mysql-slave-0    Bound    pvc-0d234b12-26eb-4e07-9dc0-ef9f0230e9fa   8Gi        RWO            openebs-jiva-default   31m
data-mysql-cluster-mysql-slave-1    Bound    pvc-16531f4b-41ac-4a04-9d90-04b92aab7b49   8Gi        RWO            openebs-jiva-default   29m
# 查看 pv
$ kubectl get pv
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                                                 STORAGECLASS           REASON   AGE
pvc-b9a1d1ca-44d3-4292-af45-e6f3b3063395   8Gi        RWO            Delete           Bound    test-middleware/data-mysql-cluster-mysql-master-0                     openebs-jiva-default            33m
pvc-0d234b12-26eb-4e07-9dc0-ef9f0230e9fa   8Gi        RWO            Delete           Bound    test-middleware/data-mysql-cluster-mysql-slave-0                      openebs-jiva-default            33m
pvc-16531f4b-41ac-4a04-9d90-04b92aab7b49   8Gi        RWO            Delete           Bound    test-middleware/data-mysql-cluster-mysql-slave-1                      openebs-jiva-default            31m

8. 连接 `MySQL 集群` 验证服务#

Copy# 获取 MySQL 集群的密码
$ kubectl get secret --namespace mysql mysql -o jsonpath="{.data.mysql-root-password}" | base64 --decode
Huazhu@2021
# 启动一个临时容器
$ kubectl run mysql-cluster-mysql-client --rm --tty -i --restart='Never' --image  docker.io/bitnami/mysql:5.7.26 --namespace mysql --command -- bash
## 登陆 MySQL Master节点
$ mysql -h mysql.mysql -uroot -p
Enter password: # Huazhu@2021
mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| my_database        |
| mysql              |
| performance_schema |
| sys                |
+--------------------+
5 rows in set (0.00 sec)
# 查看主从状态
# 查看File和Position的值，在从库配置中会显示。
> show master status\G;
*************************** 1. row ***************************
             File: mysql-bin.000002
         Position: 154
     Binlog_Do_DB: 
 Binlog_Ignore_DB: 
Executed_Gtid_Set: 
1 row in set (0.00 sec)
ERROR: 
No query specified
## 登陆从库，查看主从同步状态
$ mysql -h mysql-mysql-slave.mysql -uroot -p
Enter password:  # root123
mysql> show slave status\G;
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: mysql-cluster-mysql
                  Master_User: replicator
                  Master_Port: 3306
                Connect_Retry: 10
              Master_Log_File: mysql-bin.000002  # File: mysql-bin.000002
          Read_Master_Log_Pos: 154  # Position: 154
               Relay_Log_File: mysql-relay-bin.000004
                Relay_Log_Pos: 367
        Relay_Master_Log_File: mysql-bin.000002
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 154
              Relay_Log_Space: 2236
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 641
                  Master_UUID: aa7a516b-cf5c-11ec-b974-a2ee403fe88f
             Master_Info_File: mysql.slave_master_info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind: 
      Last_IO_Error_Timestamp: 
     Last_SQL_Error_Timestamp: 
               Master_SSL_Crl: 
           Master_SSL_Crlpath: 
           Retrieved_Gtid_Set: 
            Executed_Gtid_Set: 
                Auto_Position: 0
         Replicate_Rewrite_DB: 
                 Channel_Name: 
           Master_TLS_Version: 
1 row in set (0.00 sec)
ERROR: 
No query specified

Helm 创建高可用redis集群

发表于 2023-05-11 |

数据 在多个Redis节点之间自动分片

sentinel特点：

它的主要功能有以下几点
不时地监控redis是否按照预期良好地运行;
如果发现某个redis节点运行出现状况，能够通知另外一个进程(例如它的客户端);
能够进行自动切换。当一个master节点不可用时，能够选举出master的多个slave(如果有超过一个slave的话)中的一个来作为新的master,其它的slave节点会将它所追随的master的地址改为被提升为master的slave的新地址。

sentinel配置文件详解

参考：https://segmentfault.com/a/1190000002680804

主节点down了，从节点选举机制如下：

　　https://blog.csdn.net/tr1912/article/details/81265007

安装redis集群

我们首先添加一下helm库，并且搜索到redis

$ helm repo add bitnami https://charts.bitnami.com/bitnami
$ helm search repo redis
NAME                                CHART VERSION    APP VERSION    DESCRIPTION
bitnami/redis                       14.6.6           6.2.4          Open source, advanced key-value store. It is of...
bitnami/redis-cluster               6.2.3            6.2.4          Open source, advanced key-value store. It is of...
stable/prometheus-redis-exporter    3.5.0            1.3.4          Prometheus exporter for Redis metrics
stable/redis                        10.5.7           5.0.7          DEPRECATED Open source, advanced key-value stor...
stable/redis-ha                     4.4.6            5.0.6          DEPRECATED - Highly available Kubernetes implem...

helm search repo redis
helm pull stable/redis-ha
tar zxvf redis-ha-*.tgz
cp redis-ha/values.yaml .

cat value.yaml

## Configure resource requests and limits
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
image:
  repository: redis
  tag: 5.0.6-alpine
  pullPolicy: IfNotPresent
## Reference to one or more secrets to be used when pulling images
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
## This imagePullSecrets is only for redis images
##
imagePullSecrets: []
# - name: "image-pull-secret"
## replicas number for each component
replicas: 3
## Kubernetes priorityClass name for the redis-ha-server pod
# priorityClassName: ""
## Custom labels for the redis pod
labels: {}
## Pods Service Account
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
serviceAccount:
  ## Specifies whether a ServiceAccount should be created
  ##
  create: true
  ## The name of the ServiceAccount to use.
  ## If not set and create is true, a name is generated using the redis-ha.fullname template
  # name:
## Enables a HA Proxy for better LoadBalancing / Sentinel Master support. Automatically proxies to Redis master.
## Recommend for externally exposed Redis clusters.
## ref: https://cbonte.github.io/haproxy-dconv/1.9/intro.html
haproxy:
  enabled: false
  # Enable if you want a dedicated port in haproxy for redis-slaves
  readOnly:
    enabled: false
    port: 6380
  replicas: 3
  image:
    repository: haproxy
    tag: 2.0.4
    pullPolicy: IfNotPresent
  ## Reference to one or more secrets to be used when pulling images
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
  ##
  imagePullSecrets: []
  # - name: "image-pull-secret"
  annotations: {}
  resources: {}
  emptyDir: {}
  ## Enable sticky sessions to Redis nodes via HAProxy
  ## Very useful for long-living connections as in case of Sentry for example
  stickyBalancing: false
  ## Kubernetes priorityClass name for the haproxy pod
  # priorityClassName: ""
  ## Service type for HAProxy
  ##
  service:
    type: ClusterIP
    loadBalancerIP:
    annotations: {}
  serviceAccount:
    create: true
  ## Official HAProxy embedded prometheus metrics settings.
  ## Ref: https://github.com/haproxy/haproxy/tree/master/contrib/prometheus-exporter
  ##
  metrics:
    enabled: false
    # prometheus port & scrape path
    port: 9101
    portName: exporter-port
    scrapePath: /metrics
    serviceMonitor:
      # When set true then use a ServiceMonitor to configure scraping
      enabled: false
      # Set the namespace the ServiceMonitor should be deployed
      # namespace: monitoring
      # Set how frequently Prometheus should scrape
      # interval: 30s
      # Set path to redis-exporter telemtery-path
      # telemetryPath: /metrics
      # Set labels for the ServiceMonitor, use this to define your scrape label for Prometheus Operator
      # labels: {}
      # Set timeout for scrape
      # timeout: 10s
  init:
    resources: {}
  timeout:
    connect: 4s
    server: 30s
    client: 30s
    check: 2s
  securityContext:
    runAsUser: 1000
    fsGroup: 1000
    runAsNonRoot: true
  ## Whether the haproxy pods should be forced to run on separate nodes.
  hardAntiAffinity: true
  ## Additional affinities to add to the haproxy pods.
  additionalAffinities: {}
  ## Override all other affinity settings for the haproxy pods with a string.
  affinity: |
  ## Custom config-haproxy.cfg files used to override default settings. If this file is
  ## specified then the config-haproxy.cfg above will be ignored.
  # customConfig: |-
      # Define configuration here
  ## Place any additional configuration section to add to the default config-haproxy.cfg
  # extraConfig: |-
      # Define configuration here
## Role Based Access
## Ref: https://kubernetes.io/docs/admin/authorization/rbac/
##
rbac:
  create: true
sysctlImage:
  enabled: false
  command: []
  registry: docker.io
  repository: busybox
  tag: 1.31.1
  pullPolicy: Always
  mountHostSys: false
  resources: {}
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
##
# schedulerName:
## Redis specific configuration options
redis:
  port: 6379
  masterGroupName: "mymaster"       # must match ^[\\w-\\.]+$) and can be templated
  config:
    ## Additional redis conf options can be added below
    ## For all available options see http://download.redis.io/redis-stable/redis.conf
    min-replicas-to-write: 1
    min-replicas-max-lag: 5   # Value in seconds
    maxmemory: "0"       # Max memory to use for each redis instance. Default is unlimited.
    maxmemory-policy: "volatile-lru"  # Max memory policy to use for each redis instance. Default is volatile-lru.
    # Determines if scheduled RDB backups are created. Default is false.
    # Please note that local (on-disk) RDBs will still be created when re-syncing with a new slave. The only way to prevent this is to enable diskless replication.
    save: "900 1"
    # When enabled, directly sends the RDB over the wire to slaves, without using the disk as intermediate storage. Default is false.
    repl-diskless-sync: "yes"
    rdbcompression: "yes"
    rdbchecksum: "yes"
  ## Custom redis.conf files used to override default settings. If this file is
  ## specified then the redis.config above will be ignored.
  # customConfig: |-
      # Define configuration here
  resources: {}
  #  requests:
  #    memory: 200Mi
  #    cpu: 100m
  #  limits:
  #    memory: 700Mi
## Sentinel specific configuration options
sentinel:
  port: 26379
  quorum: 2
  config:
    ## Additional sentinel conf options can be added below. Only options that
    ## are expressed in the format simialar to 'sentinel xxx mymaster xxx' will
    ## be properly templated expect maxclients option.
    ## For available options see http://download.redis.io/redis-stable/sentinel.conf
    down-after-milliseconds: 10000
    ## Failover timeout value in milliseconds
    failover-timeout: 180000
    parallel-syncs: 5
    maxclients: 10000
  ## Custom sentinel.conf files used to override default settings. If this file is
  ## specified then the sentinel.config above will be ignored.
  # customConfig: |-
      # Define configuration here
  resources: {}
  #  requests:
  #    memory: 200Mi
  #    cpu: 100m
  #  limits:
  #    memory: 200Mi
securityContext:
  runAsUser: 1000
  fsGroup: 1000
  runAsNonRoot: true
## Node labels, affinity, and tolerations for pod assignment
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#taints-and-tolerations-beta-feature
## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
nodeSelector: {}
## Whether the Redis server pods should be forced to run on separate nodes.
## This is accomplished by setting their AntiAffinity with requiredDuringSchedulingIgnoredDuringExecution as opposed to preferred.
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#inter-pod-affinity-and-anti-affinity-beta-feature
##
hardAntiAffinity: true
## Additional affinities to add to the Redis server pods.
##
## Example:
##   nodeAffinity:
##     preferredDuringSchedulingIgnoredDuringExecution:
##       - weight: 50
##         preference:
##           matchExpressions:
##             - key: spot
##               operator: NotIn
##               values:
##                 - "true"
##
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
##
additionalAffinities: {}
## Override all other affinity settings for the Redis server pods with a string.
##
## Example:
## affinity: |
##   podAntiAffinity:
##     requiredDuringSchedulingIgnoredDuringExecution:
##       - labelSelector:
##           matchLabels:
##             app: {{ template "redis-ha.name" . }}
##             release: {{ .Release.Name }}
##         topologyKey: kubernetes.io/hostname
##     preferredDuringSchedulingIgnoredDuringExecution:
##       - weight: 100
##         podAffinityTerm:
##           labelSelector:
##             matchLabels:
##               app:  {{ template "redis-ha.name" . }}
##               release: {{ .Release.Name }}
##           topologyKey: failure-domain.beta.kubernetes.io/zone
##
affinity: |
# Prometheus exporter specific configuration options
exporter:
  enabled: false
  image: oliver006/redis_exporter
  tag: v1.3.2
  pullPolicy: IfNotPresent
  # prometheus port & scrape path
  port: 9121
  scrapePath: /metrics
  # cpu/memory resource limits/requests
  resources: {}
  # Additional args for redis exporter
  extraArgs: {}
  # Used to mount a LUA-Script via config map and use it for metrics-collection
  # script: |
  #   -- Example script copied from: https://github.com/oliver006/redis_exporter/blob/master/contrib/sample_collect_script.lua
  #   -- Example collect script for -script option
  #   -- This returns a Lua table with alternating keys and values.
  #   -- Both keys and values must be strings, similar to a HGETALL result.
  #   -- More info about Redis Lua scripting: https://redis.io/commands/eval
  #
  #   local result = {}
  #
  #   -- Add all keys and values from some hash in db 5
  #   redis.call("SELECT", 5)
  #   local r = redis.call("HGETALL", "some-hash-with-stats")
  #   if r ~= nil then
  #   for _,v in ipairs(r) do
  #   table.insert(result, v) -- alternating keys and values
  #   end
  #   end
  #
  #   -- Set foo to 42
  #   table.insert(result, "foo")
  #   table.insert(result, "42") -- note the string, use tostring() if needed
  #
  #   return result
  serviceMonitor:
    # When set true then use a ServiceMonitor to configure scraping
    enabled: false
    # Set the namespace the ServiceMonitor should be deployed
    # namespace: monitoring
    # Set how frequently Prometheus should scrape
    # interval: 30s
    # Set path to redis-exporter telemtery-path
    # telemetryPath: /metrics
    # Set labels for the ServiceMonitor, use this to define your scrape label for Prometheus Operator
    # labels: {}
    # Set timeout for scrape
    # timeout: 10s
podDisruptionBudget: {}
  # maxUnavailable: 1
  # minAvailable: 1
## Configures redis with AUTH (requirepass & masterauth conf params)
auth: false
# redisPassword:
## Use existing secret containing key `authKey` (ignores redisPassword)
# existingSecret:
## Defines the key holding the redis password in existing secret.
authKey: auth
persistentVolume:
  enabled: true
  ## redis-ha data Persistent Volume Storage Class
  ## If defined, storageClassName: <storageClass>
  ## If set to "-", storageClassName: "", which disables dynamic provisioning
  ## If undefined (the default) or set to null, no storageClassName spec is
  ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
  ##   GKE, AWS & OpenStack)
  ##
  # storageClass: "-"
  accessModes:
    - ReadWriteOnce
  size: 10Gi
  annotations: {}
  # reclaimPolicy per https://kubernetes.io/docs/concepts/storage/persistent-volumes/#reclaiming
  reclaimPolicy: ""
init:
  resources: {}
# To use a hostPath for data, set persistentVolume.enabled to false
# and define hostPath.path.
# Warning: this might overwrite existing folders on the host system!
hostPath:
  ## path is evaluated as template so placeholders are replaced
  # path: "/data/{{ .Release.Name }}"
  # if chown is true, an init-container with root permissions is launched to
  # change the owner of the hostPath folder to the user defined in the
  # security context
  chown: true
emptyDir: {}

1
2
3

1. 修改 “hardAntiAffinity: true” 为 “hardAntiAffinity: false” (仅限当replicas > worker node 节点数时修改)
2. 修改 “auth: false” 为 “auth: true”，打开 “# redisPassword:” 的注释并设置密码
1. 打开 “ # storageClass: “-“ ” 的注释，并修改 “-” 为 集群中的自动供给卷 “managed-nfs-storage”， 配置中 “size: 10Gi” 的大小为默认设置，可根据需要进行调整

接着，我们来通过如下helm命令来创建redis集群，

1
2
3

helm install --namespace  redis redis-ha -f ./values.yaml ./redis-ha
helm upgrade redis-ha --namespace redis -f values.yaml ./redis-ha
helm delete redis-ha --namespace redis

创建成功之后，会有如下输出，

NAME: test-redis
LAST DEPLOYED: Sat Jul 17 21:56:12 2021
NAMESPACE: redis
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
** Please be patient while the chart is being deployed **
Redis(TM) can be accessed on the following DNS names from within your cluster:
    test-redis-master.redis.svc.cluster.local for read/write operations (port 6379)
    test-redis-replicas.redis.svc.cluster.local for read-only operations (port 6379)
To get your password run:
    export REDIS_PASSWORD=$(kubectl get secret --namespace redis test-redis -o jsonpath="{.data.redis-password}" | base64 --decode)
To connect to your Redis(TM) server:
1. Run a Redis(TM) pod that you can use as a client:
   kubectl run --namespace redis redis-client --restart='Never'  --env REDIS_PASSWORD=$REDIS_PASSWORD  --image docker.io/bitnami/redis:6.2.4-debian-10-r13 --command -- sleep infinity
   Use the following command to attach to the pod:
   kubectl exec --tty -i redis-client \
   --namespace redis -- bash
2. Connect using the Redis(TM) CLI:
   redis-cli -h test-redis-master -a $REDIS_PASSWORD
   redis-cli -h test-redis-replicas -a $REDIS_PASSWORD
To connect to your database from outside the cluster execute the following commands:
    export NODE_IP=$(kubectl get nodes --namespace redis -o jsonpath="{.items[0].status.addresses[0].address}")
    export NODE_PORT=$(kubectl get --namespace redis -o jsonpath="{.spec.ports[0].nodePort}" services test-redis-master)
    redis-cli -h $NODE_IP -p $NODE_PORT -a $REDIS_PASSWORD

这样等待redis集群的创建了，我们也可以通过命令查看是否创建成功：

$ kubectl get all -n redis
NAME                        READY   STATUS    RESTARTS   AGE
pod/test-redis-master-0     1/1     Running   0          24h
pod/test-redis-replicas-0   1/1     Running   1          24h
pod/test-redis-replicas-1   1/1     Running   0          24h
pod/test-redis-replicas-2   1/1     Running   0          23h
NAME                          TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)          AGE
service/test-redis-headless   ClusterIP   None           <none>        6379/TCP         24h
service/test-redis-master     NodePort    172.21.0.201   <none>        6379:30676/TCP   24h
service/test-redis-replicas   NodePort    172.21.6.241   <none>        6379:32112/TCP   24h
NAME                                   READY   AGE
statefulset.apps/test-redis-master     1/1     24h
statefulset.apps/test-redis-replicas   3/3     24h

这样创建全部成功，接下来就是使用redis了。

验证 redis-ha


#查看所有 pod
kubectl get pods
#执行结果
NAME                                      READY   STATUS    RESTARTS   AGE
nfs-client-provisioner-779bcc9dbb-vfjtl   1/1     Running   3          2d20h
redis-ha-server-0                         3/3     Running   1          5h32m
redis-ha-server-1                         3/3     Running   0          5h32m
#进入 redis-ha-server-0 容器内
 kubectl exec -it redis-ha-server-0  sh
 执行结果
 Defaulting container name to redis.
Use 'kubectl describe pod/redis-ha-server-0 -n default' to see all of the containers in this pod.
/data $ 
# redis-cli 测试
/data $ redis-cli
127.0.0.1:6379> auth xxxxxxxxxxx(此处为 values.yaml 文件中设置过的密码)
OK
127.0.0.1:6379> keys *
1) "test"
127.0.0.1:6379> get test
"111"
127.0.0.1:6379> set test 222
OK
127.0.0.1:6379> get test
"222"
127.0.0.1:6379>

如果需要暴露给外部使用则需要再部署一个 NodePort Service

[root@k8s-master redis-cluster]# cat service.yaml 
apiVersion: v1
kind: Service
metadata:
  name: redis-ha-service    #名称：随意
  namespace: redis   #名称：随意
  labels:
    app: redis-ha           #部署的 redis-ha 名称
spec:
  ports:
  - name: redis-ha          #部署的 redis-ha 名称
    protocol: "TCP"           #TCP 协议
    port: 26379             
    targetPort: 6379        
    nodePort: 30379         #此为外部连接k8s redis-ha 服务的端口
  selector:
    statefulset.kubernetes.io/pod-name: redis-ha-server-0
  type: NodePort

#部署 Service
kubectl apply -f service.yaml
#查看部署结果
kubectl get svc
执行结果
NAME                  TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)                       AGE
kubernetes            ClusterIP   10.96.0.1        <none>        443/TCP                       2d21h
redis-ha              ClusterIP   None             <none>        6379/TCP,26379/TCP,9121/TCP   5h47m
redis-ha-announce-0   ClusterIP   10.99.18.87      <none>        6379/TCP,26379/TCP,9121/TCP   5h47m
redis-ha-announce-1   ClusterIP   10.100.130.186   <none>        6379/TCP,26379/TCP,9121/TCP   5h47m
redis-ha-service      NodePort    10.109.28.12     <none>        26379:30379/TCP               3h25m
#此时可以通过 k8s 任意 master 节点 IP:30379 端口进行连接

故障转移实验

停止主redis：

#1、在主上故障转移测试，使主down掉
redis-cli -h redis-ha-announce-0 -p 6379 debug segfault
#2、然后进入redis容器或sentinel容器
kubectl exec -it redis-ha-server-2 -c redis sh
kubectl exec -it redis-ha-server-0 -c sentinel sh
#3、容器里面使用redis客户端连接redis服务端
redis-cli -h redis-ha-announce-1 -p 6379 
redis-cli -h redis-ha-announce-2 -p 6379
#容器里面使用redis客户端连接Sentinel服务端
redis-cli -h redis-ha-announce-0 -p 26379
#4、redis查看主从状态
info replication
#sentinel查看状态 
INFO Sentinel
#5、查看日志
kubectl logs -f  redis-ha-server-0 -c redis
kubectl logs -f  redis-ha-server-1 -c redis
kubectl logs -f  redis-ha-server-2 -c redis

#主前任主上查看redis主从状态如下，10.106.29.241是第三台redis的ip，说转换成功了

Kafka入门及进阶段

发表于 2023-04-18 |

1.1 Kafka入门

一、Kafka 是什么？

有人说世界上有三个伟大的发明：火，轮子，以及 Kafka。

发展到现在，Apache Kafka 无疑是很成功的，Confluent 公司曾表示世界五百强中有三分之一的企业在使用 Kafka。在流式计算中，Kafka 一般用来缓存数据，例如 Flink 通过消费 Kafka 的数据进行计算。

关于Kafka，我们最开始需要了解的是以下四点：

1.Apache Kafka 是一个开源消息系统，由 Scala 写成。是由 Apache 软件基金会开发的一个开源消息系统项目。

2.Kafka 最初是由 LinkedIn 公司开发，用作 LinkedIn 的活动流（Activity Stream）和运营数据处理管道（Pipeline）的基础，现在它已被多家不同类型的公司作为多种类型的数据管道和消息系统使用。

3.Kafka 是一个分布式消息队列。Kafka 对消息保存时根据 Topic 进行归类，发送消息者称为 Producer，消息接受者称为 Consumer，此外 kafka 集群有多个 kafka 实例组成，每个实例(server)称为 broker。

4.无论是 kafka 集群，还是 consumer 都依赖于 Zookeeper 集群保存一些 meta 信息，来保证系统可用性。

二、为什么要有 Kafka?

kafka 之所以受到越来越多的青睐，与它所扮演的三大角色是分不开的的：

消息系统：kafka与传统的消息中间件都具备系统解耦、冗余存储、流量削峰、缓冲、异步通信、扩展性、可恢复性等功能。与此同时，kafka还提供了大多数消息系统难以实现的消息顺序性保障及回溯性消费的功能。

存储系统：kafka把消息持久化到磁盘，相比于其他基于内存存储的系统而言，有效的降低了消息丢失的风险。这得益于其消息持久化和多副本机制。也可以将kafka作为长期的存储系统来使用，只需要把对应的数据保留策略设置为“永久”或启用主题日志压缩功能。

流式处理平台：kafka为流行的流式处理框架提供了可靠的数据来源，还提供了一个完整的流式处理框架，比如窗口、连接、变换和聚合等各类操作。

三、Kafka 基本概念

在深入理解 Kafka 之前，可以先了解下 Kafka 的基本概念。

一个典型的 Kafka 包含若干Producer、若干 Broker、若干 Consumer 以及一个 Zookeeper 集群。Zookeeper 是 Kafka 用来负责集群元数据管理、控制器选举等操作的。Producer 是负责将消息发送到 Broker 的，Broker 负责将消息持久化到磁盘，而 Consumer 是负责从Broker 订阅并消费消息。Kafka体系结构如下所示：

概念一：生产者（Producer）与消费者（Consumer）

对于 Kafka 来说客户端有两种基本类型：生产者（Producer）和消费者（Consumer）。除此之外，还有用来做数据集成的 Kafka Connect API 和流式处理的 Kafka Streams 等高阶客户端，但这些高阶客户端底层仍然是生产者和消费者API，只不过是在上层做了封装。

Producer ：消息生产者，就是向 Kafka broker 发消息的客户端；

Consumer ：消息消费者，向 Kafka broker 取消息的客户端；

概念二：Broker 和集群（Cluster）

一个 Kafka 服务器也称为 Broker，它接受生产者发送的消息并存入磁盘；Broker 同时服务消费者拉取分区消息的请求，返回目前已经提交的消息。使用特定的机器硬件，一个 Broker 每秒可以处理成千上万的分区和百万量级的消息。

若干个 Broker 组成一个集群（Cluster），其中集群内某个 Broker 会成为集群控制器（Cluster Controller），它负责管理集群，包括分配分区到 Broker、监控 Broker 故障等。在集群内，一个分区由一个 Broker 负责，这个 Broker 也称为这个分区的 Leader；当然一个分区可以被复制到多个 Broker 上来实现冗余，这样当存在 Broker 故障时可以将其分区重新分配到其他 Broker 来负责。下图是一个样例：

概念三：主题（Topic）与分区（Partition）

在 Kafka 中，消息以主题（Topic）来分类，每一个主题都对应一个「消息队列」，这有点儿类似于数据库中的表。但是如果我们把所有同类的消息都塞入到一个“中心”队列中，势必缺少可伸缩性，无论是生产者/消费者数目的增加，还是消息数量的增加，都可能耗尽系统的性能或存储。

Kafka是天然分布式的。

备份分区仅仅用作于备份，不做读写。如果某个Broker挂了，那就会选举出其他Broker的partition来作为主分区，这就实现了高可用。

另外值得一提的是：当生产者把数据丢进topic时，我们知道是写在partition上的，那partition是怎么将其持久化的呢？（不持久化如果Broker中途挂了，那肯定会丢数据嘛)。

Kafka是将partition的数据写在磁盘的(消息日志)，不过Kafka只允许追加写入(顺序访问)，避免缓慢的随机 I/O 操作。

Kafka也不是partition一有数据就立马将数据写到磁盘上，它会先缓存一部分，等到足够多数据量或等待一定的时间再批量写入(flush)。

消费者在读的时候也很有讲究：正常的读磁盘数据是需要将内核态数据拷贝到用户态的，而Kafka 通过调用sendfile()直接从内核空间（DMA的）到内核空间（Socket的），少做了一步拷贝的操作。

附docker-compose.yml 脚本

version: "3"
services:
  zookeeper:
    image: 'confluentinc/cp-zookeeper:6.2.0'
    hostname: zookeeper
    ports:
      - '2181:2181'
    environment:
      # 匿名登录--必须开启
      - ALLOW_ANONYMOUS_LOGIN=yes
      - ZOOKEEPER_CLIENT_PORT=2181
      - ZOOKEEPER_TICK_TIME=2000
    volumes:
      - ./zookeeper/data:/var/lib/zookeeper/data:Z
      - ./zookeeper/log:/var/lib/zookeeper/log:Z
  # 该镜像具体配置参考 https://github.com/bitnami/bitnami-docker-kafka/blob/master/README.md
  broker:
    image: 'confluentinc/cp-kafka:6.2.0'
    hostname: broker
    ports:
      - '9092:9092'
      - "29092:29092"
      #- '9999:9999'
    environment:
      - KAFKA_BROKER_ID=1
      - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092
      - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://192.168.3.92:9092,PLAINTEXT_HOST://localhost:29092
      #- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT
      # 客户端访问地址，更换成自己的
      #- KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://192.168.3.92:9092
      - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
      - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
      # 允许使用PLAINTEXT协议(镜像中默认为关闭,需要手动开启)
      - ALLOW_PLAINTEXT_LISTENER=yes
      # 关闭自动创建 topic 功能
      #- KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=false
      # 全局消息过期时间 6 小时(测试时可以设置短一点)
      - KAFKA_CFG_LOG_RETENTION_HOURS=6
      - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
      - KAFKA_TRANSACTION_STATE_LOG_MIN_ISR=1
      - KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR=1
      - KAFKA_HEAP_OPTS=-Xmx256M -Xms128M
      # 开启JMX监控
      #- JMX_PORT=9999
      #- KAFKA_JMX_OPTS= -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=192.168.3.92 -Dcom.sun.management.jmxremote.rmi.port=9999  -Dcom.sun.management.jmxremote.port=9999
    volumes:
      - ./kafka/data:/var/lib/kafka/data:Z
      - ./kafka/config:/var/lib/kafka/config:Z
    depends_on:
      - zookeeper
  schema-registry:
    image: confluentinc/cp-schema-registry:6.2.0
    hostname: schema-registry
    container_name: schema-registry
    depends_on:
      - zookeeper
      - broker
    ports:
      - "8081:8081"
    environment:
      SCHEMA_REGISTRY_HOST_NAME: schema-registry
      SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181
      
  connect:
    image: confluentinc/cp-kafka-connect:latest
    hostname: connect
    container_name: connect
    depends_on:
      - zookeeper
      - broker
      - schema-registry
    ports:
      - "8083:8083"
    environment:
      CONNECT_BOOTSTRAP_SERVERS: broker:29092
      CONNECT_REST_ADVERTISED_HOST_NAME: localhost
      CONNECT_REST_PORT: 8083
      CONNECT_GROUP_ID: ksql-connect-cluster
      CONNECT_OFFSET_STORAGE_TOPIC: ksql-connect-configs
      CONNECT_CONFIG_STORAGE_TOPIC: ksql-connect-topics
      CONNECT_STATUS_STORAGE_TOPIC: ksql-connect-statuses
      CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
      CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
      CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
      CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
      CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
      CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
      CONNECT_ZOOKEEPER_CONNECT: 'zookeeper:2181'
  ksqldb-server:
    image: confluentinc/ksqldb-server:latest
    hostname: ksqldb-server
    depends_on:
      - broker
    ports:
      - "8088:8088"
    #healthcheck:
    #  test: curl -f http://ksqldb-server:8088/ || exit 1
    environment:
      KSQL_LISTENERS: http://0.0.0.0:8088
      KSQL_BOOTSTRAP_SERVERS: 192.168.3.92:9092
      KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: "true"
      KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: "true"
      KSQL_KSQL_CONNECT_URL: http://connect:8083
  ksqldb-cli:
    image: confluentinc/ksqldb-cli:latest
    container_name: ksqldb-cli
    depends_on:
      - ksqldb-server
      - broker
    entrypoint: /bin/sh
    tty: true

Kafka topic CLI:

1	kafka-topics --bootstrap-server localhost:9092 --list

1	kafka-console-producer --broker-list localhost:9092 --topic test

1	kafka-console-consumer --bootstrap-server localhost:9092 --topic test --from-beginning

1	docker exec -it ksqldb-cli ksql http://ksqldb-server:8088

1	show topics;

1 2	CREATE STREAM riderLocations (profileId VARCHAR, latitude DOUBLE, longitude DOUBLE) WITH (kafka_topic='locations', value_format='json', partitions=1);

INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('c2309eec', 37.7877, -122.4205);
INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('18f4ea86', 37.3903, -122.0643);
INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('4ab5cbad', 37.3952, -122.0813);
INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('8b6eae59', 37.3944, -122.0813);
INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('4a7c7b41', 37.4049, -122.0822);
INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('4ddad000', 37.7857, -122.4011);

select * from riderLocations;

1	ksql> select * from riderLocations;

1 2	CREATE TABLE myNearDemo AS \ >select profileId,la,lo from currentLocation ;

监测变化

1	select * from myNearDemo EMIT CHANGES;

原始流 stream 里插入元数据

1 2	ksql> INSERT INTO riderLocations (profileId, latitude, longitude) VALUES ('4ddad000', 38.7857, -122.4012);