mysql基于k8s的主从配置 | 指尖上的记忆

mysql基于k8s的主从配置:

使用minikube 搭建mysql8.0的集群，一个主节点两个从节点
1.启动minikube
minikube start
  
必须要启动，否则后面 kubectl apply 的时候，会报如下错误:
error: error validating "mysql-rep-cluster.yaml": error validating data: failed to download openapi: Get "https://192.168.49.2:8443/openapi/v2?timeout=32s": dial tcp 192.168.49.2:8443: connect: no route to host; if you choose to ignore these errors, turn validation off with --validate=false

2.编写yaml文件，本次文件命名为 mysql-rep-cluster.yaml ，内容如下:

# mysql-master-config ConfigMap (主节点配置)
apiVersion: v1
kind: ConfigMap
metadata:
  name: mysql-master-config
  namespace: default
data:
  my.cnf: |
    [mysqld]
    user=mysql
    default_authentication_plugin=mysql_native_password
    bind-address=0.0.0.0
    log_bin=mysql-bin
    server-id=1
    binlog-format=ROW
    expire_logs_days=7
    max_binlog_size=100M
    sql_mode=STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION
    # 禁用默认的 includedir 指令
    loose_mysqlx_port=33060
    !includedir /etc/mysql/conf.d/
    !includedir /etc/mysql/mysql.conf.d/
---
# mysql-slave1-config ConfigMap (从节点1配置)
apiVersion: v1
kind: ConfigMap
metadata:
  name: mysql-slave1-config
  namespace: default
data:
  my.cnf: |
    [mysqld]
    user=mysql
    default_authentication_plugin=mysql_native_password
    bind-address=0.0.0.0
    server-id=2
    relay-log=relay-log-bin
    read-only=1
    log_slave_updates=1
    max_binlog_size=100M
    sql_mode=STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION
    # 禁用默认的 includedir 指令
    loose_mysqlx_port=33060
    !includedir /etc/mysql/conf.d/
    !includedir /etc/mysql/mysql.conf.d/
---
# mysql-slave2-config ConfigMap (从节点2配置)
apiVersion: v1
kind: ConfigMap
metadata:
  name: mysql-slave2-config
  namespace: default
data:
  my.cnf: |
    [mysqld]
    user=mysql
    default_authentication_plugin=mysql_native_password
    bind-address=0.0.0.0
    server-id=3
    relay-log=relay-log-bin
    read-only=1
    log_slave_updates=1
    max_binlog_size=100M
    sql_mode=STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION
    # 禁用默认的 includedir 指令
    loose_mysqlx_port=33060
    !includedir /etc/mysql/conf.d/
    !includedir /etc/mysql/mysql.conf.d/
---
# StatefulSet 配置
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: mysql-cluster
  namespace: default
spec:
  serviceName: mysql
  replicas: 3
  selector:
    matchLabels:
      app: mysql
  template:
    metadata:
      labels:
        app: mysql
    spec:
      initContainers:
      - name: init-mysql
        image: busybox:1.36
        command:
        - sh
        - -c
        - |
          # 创建必要的目录结构
          mkdir -p /etc/mysql/conf.d
          mkdir -p /etc/mysql/mysql.conf.d
          
          # 根据主机名选择配置
          if [ "${HOSTNAME}" = "mysql-cluster-0" ]; then
            cp /mnt/master-config/my.cnf /etc/mysql/my.cnf
          elif [ "${HOSTNAME}" = "mysql-cluster-1" ]; then
            cp /mnt/slave1-config/my.cnf /etc/mysql/my.cnf
          else
            cp /mnt/slave2-config/my.cnf /etc/mysql/my.cnf
          fi
          
          # 设置正确的权限
          chmod -R 755 /etc/mysql
          chown -R 999:999 /etc/mysql
        volumeMounts:
        - name: mysql-master-config
          mountPath: /mnt/master-config
        - name: mysql-slave1-config
          mountPath: /mnt/slave1-config
        - name: mysql-slave2-config
          mountPath: /mnt/slave2-config
        - name: config-dir
          mountPath: /etc/mysql
      containers:
      - name: mysql
        image: mysql:8.0
        env:
        - name: MYSQL_ROOT_PASSWORD
          value: "rootpassword"
        volumeMounts:
        - name: config-dir
          mountPath: /etc/mysql
        - name: mysql-data
          mountPath: /var/lib/mysql
        ports:
        - containerPort: 3306
          name: mysql
      volumes:
      - name: mysql-master-config
        configMap:
          name: mysql-master-config
      - name: mysql-slave1-config
        configMap:
          name: mysql-slave1-config
      - name: mysql-slave2-config
        configMap:
          name: mysql-slave2-config
      - name: config-dir
        emptyDir: {}
  volumeClaimTemplates:
  - metadata:
      name: mysql-data
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: 1Gi
---
# Headless Service
apiVersion: v1
kind: Service
metadata:
  name: mysql
  namespace: default
spec:
  clusterIP: None
  ports:
  - port: 3306
  selector:
    app: mysql
---
# ClusterIP Service
apiVersion: v1
kind: Service
metadata:
  name: mysql-cluster
  namespace: default
spec:
  ports:
  - port: 3306
    targetPort: 3306
  selector:
    app: mysql

这个 yaml 文件我花了两天时间搞定，其中的 ConfigMap 一开始没有分成三个，只是简单的配置了两个，而且在后面使用的时候，还只是配置了 mysql-master-config, 导致最后三个节点的 server-id=1 都是1，然后在后续查看slave节点的状态的时候，报:
Last_IO_Error: Fatal error: The replica I/O thread stops because source and replica have equal MySQL server ids; these ids must be different for replication to work (or the --replicate-same-server-id option must be used on replica but this does not always make sense; please check the manual before using it). 这意味着要为所有的节点配置不同的 server-id，我这里有三个节点，那就必须要有三个 ConfigMap，以及在后面 StatefulSet 中分别使用这三个配置。
  
3.使用yaml文件创建集群
$ kubectl apply -f mysql-rep-cluster.yaml
  
4.查看pods状态
以下是几种可能出现的情况
  
1>
$ kubectl get pods //一直找不到任何东西
No resources found in default namespace.
  
那么就需要取查看错误了
$ kubectl get statefulsets //这个主要是排除yaml文件错误的，因为很复杂，所以很容易出错
NAME            READY   AGE
mysql-cluster   0/3     9s
  
$ kubectl describe statefulset mysql-cluster  // 这个 mysql-cluster 是上面statefulset的名称
Name:               mysql-cluster
Namespace:          default
CreationTimestamp:  Sat, 28 Dec 2024 00:25:12 +0800
Selector:           app=mysql
Labels:             <none>
Annotations:        <none>
Replicas:           3 desired | 0 total
Update Strategy:    RollingUpdate
  Partition:        0
Pods Status:        0 Running / 0 Waiting / 0 Succeeded / 0 Failed
Pod Template:
  Labels:  app=mysql
  Containers:
   mysql:
    Image:      mysql:8.0
    Port:       <none>
    Host Port:  <none>
    Environment:
      MYSQL_ROOT_PASSWORD:  rootpassword
      MYSQL_DATABASE:       mysql
    Mounts:
      /etc/mysql/custom-my.cnf from mysql-config-volume (rw,path="my.cnf")
      /var/lib/mysql-data from mysql-data (rw)
  Volumes:         <none>
  Node-Selectors:  <none>
  Tolerations:     <none>
Volume Claims:
  Name:          mysql-data
  StorageClass:  
  Labels:        <none>
  Annotations:   <none>
  Capacity:      1Gi
  Access Modes:  [ReadWriteOnce]
Events:
  Type     Reason        Age                From                    Message
  ----     ------        ----               ----                    -------
  Warning  FailedCreate  5s (x12 over 16s)  statefulset-controller  create Pod mysql-cluster-0 in StatefulSet mysql-cluster failed error: Pod "mysql-cluster-0" is invalid: spec.containers[0].volumeMounts[0].name: Not found: "mysql-config-volume"
  
这个错误是因为StatefulSet的配置结构不正确。在StatefulSet中，volumes 应该定义在 Pod 模板（spec.template.spec）中，而不是直接在StatefulSet的 spec 下。将 volumes 从 StatefulSet 的 spec 层级移到了 spec.template.spec 下。这个错误其实是我最开始的一个版本里的，我把 volumes 放到了外层。
  
2>
$ kubectl get pods
NAME      READY   STATUS                  RESTARTS   AGE
mysql-0   0/1     Init:ImagePullBackOff   0          2m53s  // 注意刚开始是这样命名的，这是我之前的一个错误，这里只是贴出来
  
然后可以执行如下命令查看这个pod的详细情况，一般都会有异常信息
$ kubectl describe pod mysql-0
Name:             mysql-0
Namespace:        default
Priority:         0
Service Account:  default
Node:             minikube/192.168.49.2
Start Time:       Wed, 25 Dec 2024 23:29:14 +0800
Labels:           app=mysql
                  apps.kubernetes.io/pod-index=0
                  controller-revision-hash=mysql-6759676ccb
                  statefulset.kubernetes.io/pod-name=mysql-0
Annotations:      <none>
Status:           Pending
IP:               10.244.0.29
IPs:
  IP:           10.244.0.29
Controlled By:  StatefulSet/mysql
Init Containers:
  init-mysql:
    Container ID:  
    Image:         mysql:8.0
    Image ID:      
    Port:          <none>
    Host Port:     <none>
    Command:
      bash
      -c
      set -ex
      [[ `hostname` =~ -([0-9]+)$ ]] || exit 1
      ordinal=${BASH_REMATCH[1]}
      echo [mysqld] > /mnt/conf.d/server-id.cnf
      echo server-id=$((ordinal + 1)) >> /mnt/conf.d/server-id.cnf
      if [[ $ordinal -gt 0 ]]; then
        cat << EOF >> /mnt/conf.d/repl.cnf
        [mysqld]
        gtid_mode=ON
        enforce-gtid-consistency=ON
        EOF
      fi
      if [[ $ordinal -eq 0 ]]; then
        cat << EOF >> /mnt/conf.d/master.cnf
        [mysqld]
        gtid_mode=ON
        enforce-gtid-consistency=ON
        log-bin=mysql-bin
        binlog_format=ROW
        EOF
      fi
    State:          Waiting
      Reason:       ImagePullBackOff
    Ready:          False
    Restart Count:  0
    Environment:    <none>
    Mounts:
      /mnt/conf.d/ from conf (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-l2pw2 (ro)
  clone-mysql:
    Container ID:  
    Image:         busybox
    Image ID:      
    Port:          <none>
    Host Port:     <none>
    Command:
      /bin/sh
      -c
      if [ ! -d /var/lib/mysql-k8s/mysql ]; then mkdir -p /var/lib/mysql-k8s; fi
    State:          Waiting
      Reason:       PodInitializing
    Ready:          False
    Restart Count:  0
    Environment:    <none>
    Mounts:
      /var/lib/mysql-k8s from data (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-l2pw2 (ro)
Containers:
  mysql:
    Container ID:  
    Image:         mysql:8.0
    Image ID:      
    Port:          3307/TCP
    Host Port:     0/TCP
    Args:
      --datadir=/var/lib/mysql-k8s
      --default-authentication-plugin=mysql_native_password
    State:          Waiting
      Reason:       PodInitializing
    Ready:          False
    Restart Count:  0
    Environment:
      MYSQL_ROOT_PASSWORD:  password123
    Mounts:
      /docker-entrypoint-initdb.d from config-map (rw)
      /etc/mysql/conf.d-k8s from conf (rw)
      /var/lib/mysql-k8s from data (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-l2pw2 (ro)
Conditions:
  Type                        Status
  PodReadyToStartContainers   True 
  Initialized                 False 
  Ready                       False 
  ContainersReady             False 
  PodScheduled                True 
Volumes:
  data:
    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
    ClaimName:  data-mysql-0
    ReadOnly:   false
  conf:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     
    SizeLimit:  <unset>
  config-map:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      mysql-config
    Optional:  false
  kube-api-access-l2pw2:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
QoS Class:                   BestEffort
Node-Selectors:              <none>
Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason            Age                    From               Message
  ----     ------            ----                   ----               -------
  Warning  FailedScheduling  4m43s                  default-scheduler  0/1 nodes are available: pod has unbound immediate PersistentVolumeClaims. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
  Normal   Scheduled         4m42s                  default-scheduler  Successfully assigned default/mysql-0 to minikube
  Normal   Pulling           2m21s (x4 over 4m40s)  kubelet            Pulling image "mysql:8.0"
  Warning  Failed            95s (x4 over 4m25s)    kubelet            Failed to pull image "mysql:8.0": Error response from daemon: Get "https://registry-1.docker.io/v2/": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
  Warning  Failed            95s (x4 over 4m25s)    kubelet            Error: ErrImagePull
  Warning  Failed            81s (x6 over 4m24s)    kubelet            Error: ImagePullBackOff
  Normal   BackOff           68s (x7 over 4m24s)    kubelet            Back-off pulling image "mysql:8.0"
  
这里明显的可以看到一个问题，镜像拉不下来，其实这个问题的主要原因是，执行 kubectl apply 的时候，会在 minikube 创建的容器环境下执行，在这个 minikube 的容器环境下还有一套 dcoker 环境，但是这个dockr环境目前还不能直接和宿主机的docker 环境一样访问hub docker 仓库，官方文档说是可以设置代理，然而并没有什么用，所以唯一的方法就是在宿主机上拉取镜像到本地，如下:
$ docker pull mysql:8.0
8.0: Pulling from library/mysql
2c0a233485c3: Pull complete 
b746eccf8a0b: Pull complete 
570d30cf82c5: Pull complete 
c7d84c48f09d: Pull complete 
e9ecf1ccdd2a: Pull complete 
6331406986f7: Pull complete 
f93598758d10: Pull complete 
6c136cb242f2: Pull complete 
d255d476cd34: Pull complete 
dbfe60d9fe24: Pull complete 
9cb9659be67b: Pull complete 
Digest: sha256:d58ac93387f644e4e040c636b8f50494e78e5afc27ca0a87348b2f577da2b7ff
Status: Downloaded newer image for mysql:8.0
docker.io/library/mysql:8.0
  
minikube image load mysql:8.0 //必须要执行这个，把宿主机上拉取的镜像加载到 minikube的docker环境，包括后面的 busybox 也有相似的问题，我直接限定了 busybox:1.36 版本的镜像，然后以相同的方法处理镜像下载的问题，这里固定版本的好处，方便开版本的一致，最好不要使用latest，因为那样会变化。
  
3>
$ kubectl get pods
NAME              READY   STATUS    RESTARTS   AGE
mysql-cluster-0   1/1     Running   0          18s
mysql-cluster-1   1/1     Running   0          14s
mysql-cluster-2   1/1     Running   0          7s
  
这个是正常的，说明成功了。
  
这个:
$ kubectl get statefulsets
NAME            READY   AGE
mysql-cluster   3/3     115m
  
也是正常的了.
  
注意每次处理完错误之后，需要重新apply这个yaml文件，但是需要先执行如下命令:
$ kubectl delete -f mysql-rep-cluster.yaml
  
如果不删除的话，可能会报如下错误:
statefulset.apps/mysql-cluster configured
The Service "mysql-cluster" is invalid: spec.clusterIPs[0]: Invalid value: "None": may not be set to 'None' for NodePort services
  
然后再次执行如下命令:
$ kubectl apply -f mysql-rep-cluster.yaml
  
4.准备配置主从
1>先进入主节点
$ kubectl exec -it mysql-cluster-0 -- mysql -uroot -p  //这里把 mysql-cluster-0 当作主节点
Enter password: 
ERROR 2002 (HY000): Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock' (2)
command terminated with exit code 1
  
2>.查看日志
$ kubectl logs mysql-cluster-0
2024-12-27 15:02:30+00:00 [Note] [Entrypoint]: Entrypoint script for MySQL Server 8.0.40-1.el9 started.
2024-12-27 15:02:45+00:00 [Note] [Entrypoint]: Switching to dedicated user 'mysql'
2024-12-27 15:02:45+00:00 [Note] [Entrypoint]: Entrypoint script for MySQL Server 8.0.40-1.el9 started.
2024-12-27T15:02:45.731465Z 0 [System] [MY-010116] [Server] /usr/sbin/mysqld (mysqld 8.0.40) starting as process 1
2024-12-27T15:02:45.731504Z 0 [ERROR] [MY-010338] [Server] Can't find error-message file '/usr/share/mysql-8.0/errmsg.sys'. Check error-message file location and 'lc-messages-dir' configuration directive.
2024-12-27T15:02:46.140549Z 1 [System] [MY-013576] [InnoDB] InnoDB initialization has started.
2024-12-27T15:02:53.781089Z 1 [System] [MY-013577] [InnoDB] InnoDB initialization has ended.
2024-12-27T15:02:58.264090Z 0 [Warning] [MY-010068] [Server] CA certificate ca.pem is self signed.
2024-12-27T15:02:58.264138Z 0 [System] [MY-013602] [Server] Channel mysql_main configured to support TLS. Encrypted connections are now supported for this channel.
2024-12-27T15:02:58.443162Z 0 [Warning] [MY-011810] [Server] Insecure configuration for --pid-file: Location '/var/lib/mysql' in the path is accessible to all OS users. Consider choosing a different directory.
2024-12-27T15:02:58.794415Z 0 [System] [MY-011323] [Server] X Plugin ready for connections. Bind-address: '::' port: 33060, socket: /var/run/mysqld/mysqlx.sock
2024-12-27T15:02:58.794503Z 0 [System] [MY-010931] [Server] /usr/sbin/mysqld: ready for connections. Version: '8.0.40'  socket: '/var/lib/mysql/mysql.sock'  port: 3306  MySQL Community Server - GPL.
  
3>.通过/var/lib/mysql/mysql.sock sock文件连接 (默认的是 /var/run/mysqld/mysqld.sock， 但是集群配置的不是这个路径)
$ kubectl exec -it mysql-cluster-0 -- mysql -uroot -p --socket=/var/lib/mysql/mysql.sock
Enter password: 
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 8
Server version: 8.0.40 MySQL Community Server - GPL
  
Copyright (c) 2000, 2024, Oracle and/or its affiliates.
  
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
  
4>.创建一个复制用户
CREATE USER 'replicator'@'%' IDENTIFIED BY 'replica_password';
GRANT REPLICATION SLAVE ON *.* TO 'replicator'@'%';
FLUSH PRIVILEGES;
  
mysql> show master status\G
*************************** 1. row ***************************
             File: mysql-bin.000002
         Position: 2095
     Binlog_Do_DB: 
 Binlog_Ignore_DB: 
Executed_Gtid_Set: 
1 row in set (0.00 sec)
  
5>进入第一个从节点
$ kubectl exec -it mysql-cluster-1 -- mysql -uroot -p --socket=/var/lib/mysql/mysql.sock
mysql> SHOW VARIABLES LIKE 'server_id';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| server_id     | 2     |
+---------------+-------+
1 row in set (0.02 sec)

mysql> SHOW VARIABLES LIKE 'log_bin';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| log_bin       | ON    |
+---------------+-------+
1 row in set (0.01 sec)
  
之前的错误配置导致server_id=1，导致很多问题
  
然后配置主从同步账号:
mysql> CHANGE MASTER TO 
    ->     MASTER_HOST='10.244.0.93',
    ->     MASTER_USER='replica_user',
    ->     MASTER_PASSWORD='replica_password',
    ->     MASTER_LOG_FILE='mysql-bin.000002',
    ->     MASTER_LOG_POS=843;
Query OK, 0 rows affected, 8 warnings (0.81 sec)

mysql> START SLAVE;
Query OK, 0 rows affected, 1 warning (0.29 sec)
  
mysql> SHOW SLAVE STATUS\G
主要看下面两个参数是不是Yes，说明同步配置的没问题，如果不是，就说明有问题
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
  
下麦年就是有问题的:
最开始我的写法如下
mysql> CHANGE MASTER TO
    ->   MASTER_HOST='mysql-cluster-0.mysql.default.svc.cluster.local',
    ->   MASTER_USER='replica_user',
    ->   MASTER_PASSWORD='replica_password',
    ->   MASTER_LOG_FILE='mysql-bin.000001',  # 
    ->   MASTER_LOG_POS=4;
  
然后slave的状态:
mysql> SHOW SLAVE STATUS\G
*************************** 1. row ***************************
               Slave_IO_State: Connecting to source
                  Master_Host: mysql-cluster-0.mysql.default.svc.cluster.local
                  Master_User: replica_user
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000001
          Read_Master_Log_Pos: 4
               Relay_Log_File: mysql-cluster-1-relay-bin.000001
                Relay_Log_Pos: 4
        Relay_Master_Log_File: mysql-bin.000001
             Slave_IO_Running: Connecting
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 4
              Relay_Log_Space: 157
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 2005
                Last_IO_Error: Error connecting to source 'replica_user@mysql-cluster-0.mysql.default.svc.cluster.local:3306'. This was attempt 1/86400, with a delay of 60 seconds between attempts. Message: Unknown MySQL server host 'mysql-cluster-0.mysql.default.svc.cluster.local' (-2)
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 0
                  Master_UUID: 
             Master_Info_File: mysql.slave_master_info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Replica has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind: 
      Last_IO_Error_Timestamp: 241227 16:53:13
     Last_SQL_Error_Timestamp: 
               Master_SSL_Crl: 
           Master_SSL_Crlpath: 
           Retrieved_Gtid_Set: 
            Executed_Gtid_Set: 
                Auto_Position: 0
         Replicate_Rewrite_DB: 
                 Channel_Name: 
           Master_TLS_Version: 
       Master_public_key_path: 
        Get_master_public_key: 0
            Network_Namespace: 
1 row in set, 1 warning (0.00 sec)
  
这里有个 Last_IO_Error，就是不能连接master的问题，然后不管我怎么修改MASTER_HOST,都有问题，最后才找到下面的答案:
DNS 解析的主机名问题:
mysql-cluster-0.mysql.default.svc.cluster.local 是通过 ClusterIP 类型的 Service 提供的 DNS 解析。当 Service 类型为 NodePort 时，它不直接支持这种集群内的主机名解析。
  
跨 Pod 通信的问题:
NodePort 主要用于从集群外部访问服务，而不是集群内部 Pod 的通信。在 StatefulSet 的场景中，主从之间的通信通常依赖于 ClusterIP。
  
因为我最开始是将service 配置的NodePort类型，所以一直有问题，后来改了还几个版本，使用ClusterIP类型才解决问题
  
最后我决定直接查看master的ip,将host配置为ip，如下:
$ kubectl describe svc mysql-cluster
Name:                     mysql-cluster
Namespace:                default
Labels:                   <none>
Annotations:              <none>
Selector:                 app=mysql
Type:                     ClusterIP
IP Family Policy:         SingleStack
IP Families:              IPv4
IP:                       10.102.80.128
IPs:                      10.102.80.128
Port:                     <unset>  3306/TCP
TargetPort:               3306/TCP
Endpoints:                10.244.0.93:3306,10.244.0.94:3306,10.244.0.95:3306
Session Affinity:         None
Internal Traffic Policy:  Cluster
Events:                   <none>
  
10.244.0.93 就是master的ip，但是这样有个缺点，就是ip可能会变化，所以配置成 mysql-cluster-0.mysql.default.svc.cluster.local 最好，但是好像还是有问题，关于这个问题还需要继续探讨...
  
当配置为ip之前，执行如下操作:
mysql> stop slave;
Query OK, 0 rows affected, 1 warning (0.19 sec)
  
然后:
mysql> CHANGE MASTER TO 
    ->     MASTER_HOST='10.244.0.93',
    ->     MASTER_USER='replica_user',
    ->     MASTER_PASSWORD='replica_password',
    ->     MASTER_LOG_FILE='mysql-bin.000002',
    ->     MASTER_LOG_POS=843;
Query OK, 0 rows affected, 8 warnings (0.81 sec)
  
mysql> start slave;
Query OK, 0 rows affected, 1 warning (0.19 sec)
  
mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Waiting for source to send event
                  Master_Host: 10.244.0.93
                  Master_User: replica_user
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000002
          Read_Master_Log_Pos: 843
               Relay_Log_File: relay-log-bin.000002
                Relay_Log_Pos: 326
        Relay_Master_Log_File: mysql-bin.000002
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 843
              Relay_Log_Space: 534
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 1
                  Master_UUID: 16b0a150-c397-11ef-ba7e-c6314bb59e6f
             Master_Info_File: mysql.slave_master_info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Replica has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind: 
      Last_IO_Error_Timestamp: 
     Last_SQL_Error_Timestamp: 
               Master_SSL_Crl: 
           Master_SSL_Crlpath: 
           Retrieved_Gtid_Set: 
            Executed_Gtid_Set: 
                Auto_Position: 0
         Replicate_Rewrite_DB: 
                 Channel_Name: 
           Master_TLS_Version: 
       Master_public_key_path: 
        Get_master_public_key: 0
            Network_Namespace: 
1 row in set, 1 warning (0.00 sec)
  
现在都是Yes了，基本可以确定主从配置完成，对第二个从节点也是类似的操作

  
[一天之后]，我重新启动宿主机，发现 show slave status\G 报如下错误:
Last_IO_Error: Error connecting to source 'replica_user@10.244.0.93:3306'. This was attempt 10/86400, with a delay of 60 seconds between attempts. Message: Can't connect to MySQL server on '10.244.0.93:3306' (113)

果真，重启服务器之后，master 的 ip 发生了变化， slave 无法连接，所以这个 master_host 要配置为名称，经过我的测试，改为如下配置就可以:
mysql> CHANGE MASTER TO 
    ->     MASTER_HOST='mysql-cluster-0.mysql',
    ->     MASTER_USER='replica_user',
    ->     MASTER_PASSWORD='replica_password',
    ->     MASTER_LOG_FILE='mysql-bin.000003',
    ->     MASTER_LOG_POS=157;
  
配置为 mysql-cluster-0.mysql, 而不是 mysql-cluster-0.mysql.default.svc.cluster.local,
再次查看:
mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Waiting for source to send event
                  Master_Host: mysql-cluster-0.mysql
                  Master_User: replica_user
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000003
          Read_Master_Log_Pos: 472
               Relay_Log_File: relay-log-bin.000002
                Relay_Log_Pos: 326
        Relay_Master_Log_File: mysql-bin.000003
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 472
              Relay_Log_Space: 534
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 1
                  Master_UUID: 16b0a150-c397-11ef-ba7e-c6314bb59e6f
             Master_Info_File: mysql.slave_master_info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Replica has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind: 
      Last_IO_Error_Timestamp: 
     Last_SQL_Error_Timestamp: 
               Master_SSL_Crl: 
           Master_SSL_Crlpath: 
           Retrieved_Gtid_Set: 
            Executed_Gtid_Set: 
                Auto_Position: 0
         Replicate_Rewrite_DB: 
                 Channel_Name: 
           Master_TLS_Version: 
       Master_public_key_path: 
        Get_master_public_key: 0
            Network_Namespace: 
1 row in set, 1 warning (0.00 sec)


  
数据验证:
主:
mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| mysql              |
| performance_schema |
| sys                |
| testdb             |
+--------------------+
5 rows in set (0.00 sec)
  
mysql> 
mysql> 
mysql> use testdb;
Database changed
mysql> 
mysql> show tables;
Empty set (0.00 sec)
  
mysql> 
mysql> 
mysql> CREATE TABLE users (
    ->     id INT PRIMARY KEY AUTO_INCREMENT,
    ->     name VARCHAR(50),
    ->     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    -> );
Query OK, 0 rows affected (1.13 sec)
  
mysql> 
mysql> 
mysql> INSERT INTO users (name) VALUES ('test1'), ('test2');
Query OK, 2 rows affected (0.14 sec)
Records: 2  Duplicates: 0  Warnings: 0
  
mysql> SELECT * FROM users;
+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
|  1 | test1 | 2024-12-28 14:13:19 |
|  2 | test2 | 2024-12-28 14:13:19 |
+----+-------+---------------------+
2 rows in set (0.00 sec)


  
从:
mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| mysql              |
| performance_schema |
| sys                |
| testdb             |
+--------------------+
5 rows in set (0.00 sec)
  
mysql> 
mysql> 
mysql> use testdb;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
  
Database changed
mysql> 
mysql> 
mysql> show tables;
+------------------+
| Tables_in_testdb |
+------------------+
| users            |
+------------------+
1 row in set (0.00 sec)
  
mysql> 
mysql> 
mysql> select * from users;
+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
|  1 | test1 | 2024-12-28 14:13:19 |
|  2 | test2 | 2024-12-28 14:13:19 |
+----+-------+---------------------+
2 rows in set (0.00 sec)

  
从库用户设置:
设置了read_only的slave节点:
mysql> SHOW VARIABLES LIKE 'read_only';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| read_only     | ON    |
+---------------+-------+
1 row in set (0.00 sec)
  
mysql> SHOW VARIABLES LIKE 'super_read_only';
+-----------------+-------+
| Variable_name   | Value |
+-----------------+-------+
| super_read_only | OFF   |
+-----------------+-------+
1 row in set (0.00 sec)

  
发现可以删除数据:
mysql> select * from users;
+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
|  1 | test1 | 2024-12-28 14:13:19 |
|  3 | test3 | 2024-12-28 14:17:24 |
|  4 | test4 | 2024-12-28 14:17:24 |
+----+-------+---------------------+
3 rows in set (0.00 sec)

  
mysql> delete from users where id =3;
Query OK, 1 row affected (0.09 sec)
  
mysql> 
mysql> 
mysql> select * from users;
+----+-------+---------------------+
| id | name  | created_at          |
+----+-------+---------------------+
|  1 | test1 | 2024-12-28 14:13:19 |
|  4 | test4 | 2024-12-28 14:17:24 |
+----+-------+---------------------+
2 rows in set (0.00 sec)

  
创建一个普通用户:
CREATE USER 'readonly_user1'@'%' IDENTIFIED BY 'password_read_only';
GRANT SELECT ON *.* TO 'readonly_user1'@'%';
FLUSH PRIVILEGES;
  
SHOW GRANTS FOR 'readonly_user1'@'%';
  
mysql> CREATE USER 'readonly_user1'@'%' IDENTIFIED BY 'password_read_only';
Query OK, 0 rows affected (0.09 sec)
  
mysql> GRANT SELECT ON *.* TO 'readonly_user1'@'%';
Query OK, 0 rows affected (0.07 sec)
  
mysql> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.02 sec)
  
mysql> SHOW GRANTS FOR 'readonly_user1'@'%';
+---------------------------------------------+
| Grants for readonly_user1@%                 |
+---------------------------------------------+
| GRANT SELECT ON *.* TO `readonly_user1`@`%` |
+---------------------------------------------+
1 row in set (0.00 sec)


  
使用这个普通用户登陆:
$ kubectl exec -it mysql-cluster-2 -- mysql -ureadonly_user1 -p --socket=/var/lib/mysql/mysql.sock
  
mysql> delete from users where id = 4;
ERROR 1290 (HY000): The MySQL server is running with the --read-only option so it cannot execute this statement
  
现在普通用户不能删除数据，符合需求. 对于普通用户 read_only 设置会生效，如果设置了ON, 那么这个用户只能取数据。但是对于 super user 比如 root 是不生效的，super user 是由 super_read_only 来决定的。


  
资源查看:
$ kubectl get statefulsets
NAME            READY   AGE
mysql-cluster   3/3     174m

  
$ kubectl get svc
NAME            TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
kubernetes      ClusterIP   10.96.0.1       <none>        443/TCP    18d
mysql           ClusterIP   None            <none>        3306/TCP   174m
mysql-cluster   ClusterIP   10.102.80.128   <none>        3306/TCP   174m

  
$ kubectl get pods
NAME              READY   STATUS    RESTARTS   AGE
mysql-cluster-0   1/1     Running   0          175m
mysql-cluster-1   1/1     Running   0          175m
mysql-cluster-2   1/1     Running   0          175m

  
linux命令补充:
vim批量删除操作:
批量部分删除
v 选行，然后 d
  
批量全部删除
:%d