Skip to content

File master_heartbeat.h

File List > cubrid > src > executables > master_heartbeat.h

Go to the documentation of this file

/*
 * Copyright 2008 Search Solution Corporation
 * Copyright 2016 CUBRID Corporation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * master_heartbeat.h - heartbeat module in cub_master
 */

#ifndef _MASTER_HEARTBEAT_H_
#define _MASTER_HEARTBEAT_H_

#ident "$Id$"

#include "heartbeat.h"
#include "log_lsa.hpp"
#include "master_util.h"
#include "porting.h"
#include "system_parameter.h"

#if defined (LINUX)
#include <netinet/in.h>
#elif defined (WINDOWS)
#include <winsock2.h>
#endif

#if defined(WINDOWS)
typedef int pid_t;
#endif

/* ping result */
enum HB_PING_RESULT
{
  HB_PING_UNKNOWN = -1,
  HB_PING_SUCCESS = 0,
  HB_PING_USELESS_HOST = 1,
  HB_PING_SYS_ERR = 2,
  HB_PING_FAILURE = 3
};

#define HB_PING_UNKNOWN_STR          "UNKNOWN"
#define HB_PING_SUCCESS_STR          "SUCCESS"
#define HB_PING_USELESS_HOST_STR     "SKIPPED"
#define HB_PING_SYS_ERR_STR          "ERROR"
#define HB_PING_FAILURE_STR          "FAILURE"
#define HB_PING_STR_SIZE             (7)

/* heartbeat cluster jobs */
enum HB_CLUSTER_JOB
{
  HB_CJOB_INIT = 0,
  HB_CJOB_HEARTBEAT = 1,
  HB_CJOB_CALC_SCORE = 2,
  HB_CJOB_CHECK_PING = 3,
  HB_CJOB_FAILOVER = 4,
  HB_CJOB_FAILBACK = 5,
  HB_CJOB_CHECK_VALID_PING_SERVER = 6,
  HB_CJOB_DEMOTE = 7,
  HB_CJOB_MAX
};

/* heartbeat resource jobs */
enum HB_RESOURCE_JOB
{
  HB_RJOB_PROC_START = 0,
  HB_RJOB_PROC_DEREG = 1,
  HB_RJOB_CONFIRM_START = 2,
  HB_RJOB_CONFIRM_DEREG = 3,
  HB_RJOB_CHANGE_MODE = 4,
  HB_RJOB_DEMOTE_START_SHUTDOWN = 5,
  HB_RJOB_DEMOTE_CONFIRM_SHUTDOWN = 6,
  HB_RJOB_CLEANUP_ALL = 7,
  HB_RJOB_CONFIRM_CLEANUP_ALL = 8,
  HB_RJOB_MAX
};

/*  heartbet resource process state
 *  When change this, must be change the SERVER_STATE.
 *  broker.c : enum SERVER_STATE */
enum HB_PROC_STATE
{
  HB_PSTATE_UNKNOWN = 0,
  HB_PSTATE_DEAD = 1,
  HB_PSTATE_DEREGISTERED = 2,
  HB_PSTATE_STARTED = 3,
  HB_PSTATE_NOT_REGISTERED = 4,
  HB_PSTATE_REGISTERED = 5,
  HB_PSTATE_REGISTERED_AND_STANDBY = HB_PSTATE_REGISTERED,
  HB_PSTATE_REGISTERED_AND_TO_BE_STANDBY = 6,
  HB_PSTATE_REGISTERED_AND_ACTIVE = 7,
  HB_PSTATE_REGISTERED_AND_TO_BE_ACTIVE = 8,
  HB_PSTATE_MAX
};
#define HB_PSTATE_UNKNOWN_STR                   "unknown"
#define HB_PSTATE_DEAD_STR                      "dead"
#define HB_PSTATE_DEREGISTERED_STR              "deregistered"
#define HB_PSTATE_STARTED_STR                   "started"
#define HB_PSTATE_NOT_REGISTERED_STR            "not_registered"
#define HB_PSTATE_REGISTERED_STR                "registered"
#define HB_PSTATE_REGISTERED_AND_STANDBY_STR        "registered_and_standby"
#define HB_PSTATE_REGISTERED_AND_TO_BE_STANDBY_STR  "registered_and_to_be_standby"
#define HB_PSTATE_REGISTERED_AND_ACTIVE_STR         "registered_and_active"
#define HB_PSTATE_REGISTERED_AND_TO_BE_ACTIVE_STR   "registered_and_to_be_active"
#define HB_PSTATE_STR_SZ                        (32)

#define HB_REPLICA_PRIORITY                     0x7FFF

/* heartbeat node score bitmask */
#define HB_NODE_SCORE_MASTER                    0x8000
#define HB_NODE_SCORE_TO_BE_MASTER              0xF000
#define HB_NODE_SCORE_SLAVE                     0x0000
#define HB_NODE_SCORE_UNKNOWN                   0x7FFF

#define HB_BUFFER_SZ                            (4096)
#define HB_MAX_NUM_NODES                        (8)
#define HB_MAX_NUM_RESOURCE_PROC                (16)
#define HB_MAX_PING_CHECK                       (3)
#define HB_MAX_WAIT_FOR_NEW_MASTER              (60)
#define HB_MAX_CHANGEMODE_DIFF_TO_TERM      (12)
#define HB_MAX_CHANGEMODE_DIFF_TO_KILL      (24)

/* various strings for er_set */
#define HB_RESULT_SUCCESS_STR                   "Success"
#define HB_RESULT_FAILURE_STR                   "Failure"

#define HB_CMD_ACTIVATE_STR                     "activate"
#define HB_CMD_DEACTIVATE_STR                   "deactivate"
#define HB_CMD_DEREGISTER_STR                   "deregister"
#define HB_CMD_RELOAD_STR                       "reload"
#define HB_CMD_UTIL_START_STR                   "util process start"

enum HB_HOST_CHECK_RESULT
{
  HB_HC_ELIGIBLE_LOCAL,
  HB_HC_ELIGIBLE_REMOTE,
  HB_HC_UNAUTHORIZED,
  HB_HC_FAILED
};

enum HB_NOLOG_REASON
{
  HB_NOLOG_DEMOTE_ON_DISK_FAIL,
  HB_NOLOG_REMOTE_STOP,
  HB_NOLOG_MAX = HB_NOLOG_REMOTE_STOP
};

/* heartbeat validation result */
enum HB_VALID_RESULT
{
  HB_VALID_NO_ERROR = 0,
  HB_VALID_UNIDENTIFIED_NODE = 1,
  HB_VALID_GROUP_NAME_MISMATCH = 2,
  HB_VALID_IP_ADDR_MISMATCH = 3,
  HB_VALID_CANNOT_RESOLVE_HOST = 4
};
#define HB_VALID_NO_ERROR_STR           "no_error"
#define HB_VALID_UNIDENTIFIED_NODE_STR      "unidentified_node"
#define HB_VALID_GROUP_NAME_MISMATCH_STR    "group_name_mismatch"
#define HB_VALID_IP_ADDR_MISMATCH_STR       "ip_addr_mismatch"
#define HB_VALID_CANNOT_RESOLVE_HOST_STR    "cannot_resolve_host_name"

/* time related macro */
#define HB_GET_ELAPSED_TIME(end_time, start_time) \
            ((double)(end_time.tv_sec - start_time.tv_sec) * 1000 + \
             (end_time.tv_usec - start_time.tv_usec)/1000.0)

#define HB_IS_INITIALIZED_TIME(arg_time) \
            ((arg_time.tv_sec == 0 && arg_time.tv_usec == 0) ? 1 : 0)

#define HB_PROC_RECOVERY_DELAY_TIME     (30* 1000)  /* milli-second */

#define HB_UI_NODE_CLEANUP_TIME_IN_MSECS    (3600 * 1000)
#define HB_UI_NODE_CACHE_TIME_IN_MSECS      (60 * 1000)
#define HB_IPV4_STR_LEN             (16)

/* heartbeat list */
typedef struct hb_list HB_LIST;
struct hb_list
{
  HB_LIST *next;
  HB_LIST **prev;
};


/* heartbeat node entries */
typedef struct hb_node_entry HB_NODE_ENTRY;
struct hb_node_entry
{
  HB_NODE_ENTRY *next;
  HB_NODE_ENTRY **prev;

  char host_name[CUB_MAXHOSTNAMELEN];
  unsigned short priority;
  HB_NODE_STATE_TYPE state;
  short score;
  short heartbeat_gap;

  struct timeval last_recv_hbtime;  /* last received heartbeat time */
};

/* heartbeat ping host entries */
typedef struct hb_ping_host_entry HB_PING_HOST_ENTRY;
struct hb_ping_host_entry
{
  HB_PING_HOST_ENTRY *next;
  HB_PING_HOST_ENTRY **prev;

  char host_name[CUB_MAXHOSTNAMELEN];
  int port;         /* TCP ping only */
  int ping_result;
};

/* heartbeat unidentifed host entries */
typedef struct hb_ui_node_entry HB_UI_NODE_ENTRY;
struct hb_ui_node_entry
{
  HB_UI_NODE_ENTRY *next;
  HB_UI_NODE_ENTRY **prev;

  char host_name[CUB_MAXHOSTNAMELEN];
  char group_id[HB_MAX_GROUP_ID_LEN];
  struct sockaddr_in saddr;
  struct timeval last_recv_time;
  int v_result;
};

/* herartbeat cluster */
typedef struct hb_cluster HB_CLUSTER;
struct hb_cluster
{
  pthread_mutex_t lock;

  SOCKET sfd;

  HB_NODE_STATE_TYPE state;
  char group_id[HB_MAX_GROUP_ID_LEN];
  char host_name[CUB_MAXHOSTNAMELEN];

  int num_nodes;
  HB_NODE_ENTRY *nodes;

  HB_NODE_ENTRY *myself;
  HB_NODE_ENTRY *master;

  bool shutdown;
  bool hide_to_demote;
  bool is_isolated;
  bool is_ping_check_enabled;

  HB_PING_HOST_ENTRY *ping_hosts;
  int num_ping_hosts;
  int ping_timeout;     /* TCP ping only */

  HB_UI_NODE_ENTRY *ui_nodes;
  int num_ui_nodes;
};

/* heartbeat processs entries */
struct HB_PROC_ENTRY
{
  HB_PROC_ENTRY *next;
  HB_PROC_ENTRY **prev;

  unsigned char state;      /* process state */
  unsigned char type;       /* single/master-slave */

  int sfd;

  int pid;
  char exec_path[HB_MAX_SZ_PROC_EXEC_PATH];
  char args[HB_MAX_SZ_PROC_ARGS];

  struct timeval frtime;    /* first registered time */
  struct timeval rtime;     /* registerd time */
  struct timeval dtime;     /* deregistered time */
  struct timeval ktime;     /* shutdown time */
  struct timeval stime;     /* start time */

  unsigned short changemode_rid;
  unsigned short changemode_gap;

  LOG_LSA prev_eof;
  LOG_LSA curr_eof;
  bool is_curr_eof_received;

  CSS_CONN_ENTRY *conn;

  bool being_shutdown;      /* whether the proc is being shut down */
  bool server_hang;
};

/* heartbeat resources */
typedef struct hb_resource HB_RESOURCE;
struct hb_resource
{
  pthread_mutex_t lock;

  HB_NODE_STATE_TYPE state; /* mode/state */

  int num_procs;
  HB_PROC_ENTRY *procs;

  bool shutdown;
};

/* heartbeat cluster job argument */
typedef struct hb_cluster_job_arg HB_CLUSTER_JOB_ARG;
struct hb_cluster_job_arg
{
  unsigned int ping_check_count;
  unsigned int retries;     /* job retries */
};

/* heartbeat resource job argument */
typedef struct hb_resource_job_arg HB_RESOURCE_JOB_ARG;
struct hb_resource_job_arg
{
  int pid;          /* process id */
  int sfd;          /* socket fd */

  char args[HB_MAX_SZ_PROC_ARGS];   /* args */

  unsigned int retries;     /* job retries */
  unsigned int max_retries; /* job max retries */

  struct timeval ftime;     /* first job execution time */
  struct timeval ltime;     /* last job execution time */
};

/* heartbeat job argument */
typedef union hb_job_arg HB_JOB_ARG;
union hb_job_arg
{
  HB_CLUSTER_JOB_ARG cluster_job_arg;
  HB_RESOURCE_JOB_ARG resource_job_arg;
};

typedef void (*HB_JOB_FUNC) (HB_JOB_ARG *);

/* timer job queue entries */
typedef struct hb_job_entry HB_JOB_ENTRY;
struct hb_job_entry
{
  HB_JOB_ENTRY *next;
  HB_JOB_ENTRY **prev;

  unsigned int type;

  struct timeval expire;

  HB_JOB_FUNC func;
  HB_JOB_ARG *arg;
};

/* timer job queue */
typedef struct hb_job HB_JOB;
struct hb_job
{
  pthread_mutex_t lock;

  unsigned short num_jobs;
  HB_JOB_ENTRY *jobs;

  HB_JOB_FUNC *job_funcs;

  bool shutdown;
};

extern HB_CLUSTER *hb_Cluster;
extern HB_RESOURCE *hb_Resource;
extern HB_JOB *cluster_Jobs;
extern HB_JOB *resource_Jobs;

extern bool hb_Deactivate_immediately;

extern int hb_master_init (void);
extern void hb_resource_shutdown_and_cleanup (void);
extern void hb_cluster_shutdown_and_cleanup (void);

extern void hb_cleanup_conn_and_start_process (CSS_CONN_ENTRY * conn, SOCKET sfd);

extern void hb_get_node_info_string (char **str, bool verbose_yn);
extern void hb_get_process_info_string (char **str, bool verbose_yn);
extern void hb_get_ping_host_info_string (char **str);
extern void hb_get_tcp_ping_host_info_string (char **str);
extern void hb_get_admin_info_string (char **str);
extern void hb_kill_all_heartbeat_process (char **str);

extern void hb_deregister_by_pid (pid_t pid);
extern void hb_deregister_by_args (char *args);

extern void hb_reconfig_heartbeat (char **str);
extern int hb_prepare_deactivate_heartbeat (void);
extern int hb_deactivate_heartbeat (void);
extern int hb_activate_heartbeat (void);

extern bool hb_is_registered_process (CSS_CONN_ENTRY * conn, char *args);
extern void hb_register_new_process (CSS_CONN_ENTRY * conn);
extern void hb_resource_receive_changemode (CSS_CONN_ENTRY * conn);
extern void hb_resource_receive_get_eof (CSS_CONN_ENTRY * conn);

extern int hb_check_request_eligibility (SOCKET sd);
extern void hb_start_deactivate_server_info (void);
extern int hb_get_deactivating_server_count (void);
extern bool hb_is_deactivation_started (void);
extern bool hb_is_deactivation_ready (void);
extern void hb_finish_deactivate_server_info (void);

extern int hb_start_util_process (char *args);

extern void hb_enable_er_log (void);
extern void hb_disable_er_log (int reason, const char *msg_fmt, ...);

extern int hb_return_proc_state_by_fd (int sfd);
extern bool hb_is_hang_process (int sfd);

#endif /* _MASTER_HEARTBEAT_H_ */