From 2c357793ae7136b5f8e9bc7e9f015906b55655a2 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Tue, 20 May 2014 02:53:21 +0200 Subject: misc, mostly mds-base re-exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/mds-base.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++-- src/mds-base.h | 102 ++++++++++++++++++++++++++-- src/mds-server/reexec.c | 33 ++++++---- 3 files changed, 279 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/mds-base.c b/src/mds-base.c index 782f526..1de8c68 100644 --- a/src/mds-base.c +++ b/src/mds-base.c @@ -28,21 +28,26 @@ #include #include #include +#include #include #include #include +#include -#define try(INSTRUCTION) if ((r = INSTRUCTION)) return r +#define try(INSTRUCTION) if ((r = INSTRUCTION)) goto fail int argc = 0; char** argv = NULL; int is_respawn = 0; int is_reexec = 0; +pthread_t master_thread; + +volatile sig_atomic_t terminating = 0; +volatile sig_atomic_t reexecing = 0; int socket_fd = -1; -pthread_t master_thread; @@ -116,6 +121,135 @@ static int connect_to_display(void) } +/** + * Unmarshal the server's saved state + * + * @return Non-zero on error + */ +static int base_unmarshal(void) +{ + pid_t pid = getpid(); + int reexec_fd, r; + char shm_path[NAME_MAX + 1]; + char* state_buf; + char* state_buf_; + + /* Acquire access to marshalled data. */ + xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid); + reexec_fd = shm_open(shm_path, O_RDONLY, S_IRWXU); + fail_if (reexec_fd < 0); /* Critical. */ + + /* Read the state file. */ + fail_if ((state_buf = state_buf_ = full_read(reexec_fd)) == NULL); + + /* Release resources. */ + close(reexec_fd); + shm_unlink(shm_path); + + + /* Unmarshal state. */ + + /* Get the marshal protocal version. Not needed, there is only the one version right now. */ + /* buf_get(state_buf_, int, 0, MDS_BASE_VARS_VERSION); */ + buf_next(state_buf_, int, 1); + + buf_get_next(state_buf_, int, socket_fd); + r = unmarshal_server(state_buf_); + + + /* Release resources. */ + free(state_buf); + + /* Recover after failure. */ + if (r) + fail_if (reexec_failure_recover()); + + return 0; + pfail: + perror(*argv); + return 1; +} + + +/** + * Marshal the server's state + * + * @param reexec_fd The file descriptor of the file into which the state shall be saved + * @return Non-zero on error + */ +static int base_marshal(int reexec_fd) +{ + size_t state_n; + char* state_buf; + char* state_buf_; + + /* Calculate the size of the state data when it is marshalled. */ + state_n = 2 * sizeof(int); + state_n += marshal_server_size(); + + /* Allocate a buffer for all data. */ + state_buf = state_buf_ = malloc(state_n); + fail_if (state_buf == NULL); + + + /* Marshal the state of the server. */ + + /* Tell the new version of the program what version of the program it is marshalling. */ + buf_set_next(state_buf_, int, MDS_BASE_VARS_VERSION); + + /* Store the state. */ + buf_set_next(state_buf_, int, socket_fd); + marshal_server(state_buf_); + + + /* Send the marshalled data into the file. */ + fail_if (full_write(reexec_fd, state_buf, state_n) < 0); + free(state_buf); + + return 0; + + pfail: + perror(*argv); + return 1; +} + + +/** + * Marshal and re-execute the server + * + * This function only returns on error, + * in which case the error will have been printed. + */ +static void perform_reexec(void) +{ + pid_t pid = getpid(); + int reexec_fd; + char shm_path[NAME_MAX + 1]; + + /* Marshal the state of the server. */ + xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid); + reexec_fd = shm_open(shm_path, O_RDWR | O_CREAT | O_EXCL, S_IRWXU); + if (reexec_fd < 0) + { + perror(*argv); + return; + } + if (base_marshal(reexec_fd) < 0) + goto fail; + close(reexec_fd); + reexec_fd = -1; + + /* Re-exec the server. */ + reexec_server(argc, argv, is_reexec); + perror(*argv); + + fail: + if (reexec_fd >= 0) + close(reexec_fd); + shm_unlink(shm_path); +} + + /** * Entry point of the server * @@ -125,8 +259,7 @@ static int connect_to_display(void) */ int main(int argc_, char** argv_) { - int r; - + int r = 1; argc = argc_; argv = argv_; @@ -152,19 +285,44 @@ int main(int argc_, char** argv_) trap_signals(); - /* Connect to the display. */ if (is_reexec == 0) - try (connect_to_display()); + { + if (server_characteristics.require_display) + /* Connect to the display. */ + try (connect_to_display()); + + /* Initialise the server. */ + try (initialise_server()); + } + else + { + /* Unmarshal the server's saved state. */ + try (base_unmarshal()); + } + + /* Run the server. */ + try (master_loop()); + + + /* Re-exec server if signal to re-exec. */ + if (reexecing) + { + perform_reexec(); + goto fail; + } close(socket_fd); return 0; + pfail: perror(*argv); + r = 1; + fail: if (socket_fd >= 0) close(socket_fd); - return 1; + return r; } diff --git a/src/mds-base.h b/src/mds-base.h index b4e4ad7..28d7432 100644 --- a/src/mds-base.h +++ b/src/mds-base.h @@ -20,6 +20,11 @@ #include +#include + + +#define MDS_BASE_VARS_VERSION 0 + /** @@ -30,9 +35,15 @@ typedef struct server_characteristics /** * Setting this to zero will cause the server to drop privileges as a security precaution */ - int require_privileges; + int require_privileges : 1; -} server_characteristics_t; + /** + * Setting this to non-zero will cause the server to connect to the display + */ + int require_display : 1; + +} __attribute__((packed)) server_characteristics_t; + /** @@ -67,17 +78,28 @@ extern int is_respawn; */ extern int is_reexec; +/** + * The thread that runs the master loop + */ +extern pthread_t master_thread; + /** - * The file descriptor of the socket - * that is connected to the server + * Whether the server has been signaled to terminate */ -extern int socket_fd; +extern volatile sig_atomic_t terminating; /** - * The thread that runs the master loop + * Whether the server has been signaled to re-exec */ -extern pthread_t master_thread; +extern volatile sig_atomic_t reexecing; + + +/** + * The file descriptor of the socket + * that is connected to the server + */ +extern int socket_fd; @@ -95,6 +117,8 @@ int trap_signals(void); * This function is called when a signal that * signals the server to re-exec has been received * + * When this function is invoked, it should set `reexecing` to a non-zero value + * * @param signo The signal that has been received */ extern void received_reexec(int signo); @@ -105,10 +129,74 @@ extern void received_reexec(int signo); * This function is called when a signal that * signals the server to re-exec has been received * + * When this function is invoked, it should set `terminating` to a non-zero value + * * @param signo The signal that has been received */ extern void received_terminate(int signo); +/** + * This function should be implemented by the actual server implementation + * + * This function should initialise the server, + * and it not invoked after a re-exec. + * + * @return Non-zero on error + */ +extern int initialise_server(void); + +/** + * This function should be implemented by the actual server implementation + * + * Unmarshal server implementation specific data and update the servers state accordingly + * + * @param state_buf The marshalled data that as not been read already + * @return Non-zero on error + */ +extern int unmarshal_server(char* state_buf); + +/** + * This function should be implemented by the actual server implementation + * + * Marshal server implementation specific data into a buffer + * + * @param state_buf The buffer for the marshalled data + * @return Non-zero on error + */ +extern int marshal_server(char* state_buf); + +/** + * This function should be implemented by the actual server implementation + * + * Calculate the number of bytes that will be stored by `marshal_server` + * + * On failure the program should `abort()` or exit by other means. + * However it should not be possible for this function to fail. + * + * @return The number of bytes that will be stored by `marshal_server` + */ +extern size_t marshal_server_size(void); + +/** + * This function should be implemented by the actual server implementation + * + * Attempt to recover from an re-exec failure that has been + * detected after the server successfully updated it execution image + * + * @return Non-zero on error + */ +extern int reexec_failure_recover(void); + + +/** + * This function should be implemented by the actual server implementation + * + * Perform the server's mission + * + * @return Non-zero on error + */ +extern int master_loop(void); + #endif diff --git a/src/mds-server/reexec.c b/src/mds-server/reexec.c index 0262288..2b41112 100644 --- a/src/mds-server/reexec.c +++ b/src/mds-server/reexec.c @@ -70,10 +70,9 @@ int marshal_server(int fd) state_n += sizeof(int) + sizeof(sig_atomic_t) + 2 * sizeof(uint64_t) + 2 * sizeof(size_t); state_n += list_elements * sizeof(size_t) + list_size + map_size; - /* Allocate a buffer for all data except the client list and the client map. */ + /* Allocate a buffer for all data. */ state_buf = state_buf_ = malloc(state_n); - if (state_buf == NULL) - goto fail; + fail_if (state_buf == NULL); /* Tell the new version of the program what version of the program it is marshalling. */ @@ -109,13 +108,12 @@ int marshal_server(int fd) fd_table_marshal(&client_map, state_buf_); /* Send the marshalled data into the file. */ - if (full_write(fd, state_buf, state_n) < 0) - goto fail; + fail_if (full_write(fd, state_buf, state_n) < 0); free(state_buf); return 0; - fail: + pfail: perror(*argv); free(state_buf); return -1; @@ -296,23 +294,27 @@ void perform_reexec(int reexec) int reexec_fd; char shm_path[NAME_MAX + 1]; ssize_t node; - + /* Join with all slaves threads. */ with_mutex (slave_mutex, while (running_slaves > 0) pthread_cond_wait(&slave_cond, &slave_mutex);); - + /* Release resources. */ pthread_mutex_destroy(&slave_mutex); pthread_cond_destroy(&slave_cond); pthread_mutex_destroy(&modify_mutex); pthread_cond_destroy(&modify_cond); hash_table_destroy(&modify_map, NULL, NULL); - + /* Marshal the state of the server. */ xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid); reexec_fd = shm_open(shm_path, O_RDWR | O_CREAT | O_EXCL, S_IRWXU); - fail_if (reexec_fd < 0); + if (reexec_fd < 0) + { + perror(*argv); + return; + } fail_if (marshal_server(reexec_fd) < 0); close(reexec_fd); reexec_fd = -1; @@ -332,10 +334,8 @@ void perform_reexec(int reexec) pfail: perror(*argv); if (reexec_fd >= 0) - { - close(reexec_fd); - shm_unlink(shm_path); - } + close(reexec_fd); + shm_unlink(shm_path); } @@ -354,10 +354,15 @@ void complete_reexec(int socket_fd) xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid); reexec_fd = shm_open(shm_path, O_RDONLY, S_IRWXU); fail_if (reexec_fd < 0); /* Critical. */ + /* Unmarshal state. */ r = unmarshal_server(reexec_fd); + + /* Close and unlink marshalled data. */ close(reexec_fd); shm_unlink(shm_path); + + /* Recover after failure. */ if (r < 0) { /* Close all files (hopefully sockets) we do not know what they are. */ -- cgit v1.2.3-70-g09d2