aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mds-base.c172
-rw-r--r--src/mds-base.h102
-rw-r--r--src/mds-server/reexec.c33
3 files changed, 279 insertions, 28 deletions
diff --git a/src/mds-base.c b/src/mds-base.c
index 782f526..1de8c68 100644
--- a/src/mds-base.c
+++ b/src/mds-base.c
@@ -28,21 +28,26 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/mman.h>
#include <unistd.h>
#include <signal.h>
#include <pthread.h>
+#include <fcntl.h>
-#define try(INSTRUCTION) if ((r = INSTRUCTION)) return r
+#define try(INSTRUCTION) if ((r = INSTRUCTION)) goto fail
int argc = 0;
char** argv = NULL;
int is_respawn = 0;
int is_reexec = 0;
+pthread_t master_thread;
+
+volatile sig_atomic_t terminating = 0;
+volatile sig_atomic_t reexecing = 0;
int socket_fd = -1;
-pthread_t master_thread;
@@ -117,6 +122,135 @@ static int connect_to_display(void)
/**
+ * Unmarshal the server's saved state
+ *
+ * @return Non-zero on error
+ */
+static int base_unmarshal(void)
+{
+ pid_t pid = getpid();
+ int reexec_fd, r;
+ char shm_path[NAME_MAX + 1];
+ char* state_buf;
+ char* state_buf_;
+
+ /* Acquire access to marshalled data. */
+ xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid);
+ reexec_fd = shm_open(shm_path, O_RDONLY, S_IRWXU);
+ fail_if (reexec_fd < 0); /* Critical. */
+
+ /* Read the state file. */
+ fail_if ((state_buf = state_buf_ = full_read(reexec_fd)) == NULL);
+
+ /* Release resources. */
+ close(reexec_fd);
+ shm_unlink(shm_path);
+
+
+ /* Unmarshal state. */
+
+ /* Get the marshal protocal version. Not needed, there is only the one version right now. */
+ /* buf_get(state_buf_, int, 0, MDS_BASE_VARS_VERSION); */
+ buf_next(state_buf_, int, 1);
+
+ buf_get_next(state_buf_, int, socket_fd);
+ r = unmarshal_server(state_buf_);
+
+
+ /* Release resources. */
+ free(state_buf);
+
+ /* Recover after failure. */
+ if (r)
+ fail_if (reexec_failure_recover());
+
+ return 0;
+ pfail:
+ perror(*argv);
+ return 1;
+}
+
+
+/**
+ * Marshal the server's state
+ *
+ * @param reexec_fd The file descriptor of the file into which the state shall be saved
+ * @return Non-zero on error
+ */
+static int base_marshal(int reexec_fd)
+{
+ size_t state_n;
+ char* state_buf;
+ char* state_buf_;
+
+ /* Calculate the size of the state data when it is marshalled. */
+ state_n = 2 * sizeof(int);
+ state_n += marshal_server_size();
+
+ /* Allocate a buffer for all data. */
+ state_buf = state_buf_ = malloc(state_n);
+ fail_if (state_buf == NULL);
+
+
+ /* Marshal the state of the server. */
+
+ /* Tell the new version of the program what version of the program it is marshalling. */
+ buf_set_next(state_buf_, int, MDS_BASE_VARS_VERSION);
+
+ /* Store the state. */
+ buf_set_next(state_buf_, int, socket_fd);
+ marshal_server(state_buf_);
+
+
+ /* Send the marshalled data into the file. */
+ fail_if (full_write(reexec_fd, state_buf, state_n) < 0);
+ free(state_buf);
+
+ return 0;
+
+ pfail:
+ perror(*argv);
+ return 1;
+}
+
+
+/**
+ * Marshal and re-execute the server
+ *
+ * This function only returns on error,
+ * in which case the error will have been printed.
+ */
+static void perform_reexec(void)
+{
+ pid_t pid = getpid();
+ int reexec_fd;
+ char shm_path[NAME_MAX + 1];
+
+ /* Marshal the state of the server. */
+ xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid);
+ reexec_fd = shm_open(shm_path, O_RDWR | O_CREAT | O_EXCL, S_IRWXU);
+ if (reexec_fd < 0)
+ {
+ perror(*argv);
+ return;
+ }
+ if (base_marshal(reexec_fd) < 0)
+ goto fail;
+ close(reexec_fd);
+ reexec_fd = -1;
+
+ /* Re-exec the server. */
+ reexec_server(argc, argv, is_reexec);
+ perror(*argv);
+
+ fail:
+ if (reexec_fd >= 0)
+ close(reexec_fd);
+ shm_unlink(shm_path);
+}
+
+
+/**
* Entry point of the server
*
* @param argc_ Number of elements in `argv_`
@@ -125,8 +259,7 @@ static int connect_to_display(void)
*/
int main(int argc_, char** argv_)
{
- int r;
-
+ int r = 1;
argc = argc_;
argv = argv_;
@@ -152,19 +285,44 @@ int main(int argc_, char** argv_)
trap_signals();
- /* Connect to the display. */
if (is_reexec == 0)
- try (connect_to_display());
+ {
+ if (server_characteristics.require_display)
+ /* Connect to the display. */
+ try (connect_to_display());
+
+ /* Initialise the server. */
+ try (initialise_server());
+ }
+ else
+ {
+ /* Unmarshal the server's saved state. */
+ try (base_unmarshal());
+ }
+
+ /* Run the server. */
+ try (master_loop());
+
+
+ /* Re-exec server if signal to re-exec. */
+ if (reexecing)
+ {
+ perform_reexec();
+ goto fail;
+ }
close(socket_fd);
return 0;
+
pfail:
perror(*argv);
+ r = 1;
+ fail:
if (socket_fd >= 0)
close(socket_fd);
- return 1;
+ return r;
}
diff --git a/src/mds-base.h b/src/mds-base.h
index b4e4ad7..28d7432 100644
--- a/src/mds-base.h
+++ b/src/mds-base.h
@@ -20,6 +20,11 @@
#include <pthread.h>
+#include <signal.h>
+
+
+#define MDS_BASE_VARS_VERSION 0
+
/**
@@ -30,9 +35,15 @@ typedef struct server_characteristics
/**
* Setting this to zero will cause the server to drop privileges as a security precaution
*/
- int require_privileges;
+ int require_privileges : 1;
-} server_characteristics_t;
+ /**
+ * Setting this to non-zero will cause the server to connect to the display
+ */
+ int require_display : 1;
+
+} __attribute__((packed)) server_characteristics_t;
+
/**
@@ -67,17 +78,28 @@ extern int is_respawn;
*/
extern int is_reexec;
+/**
+ * The thread that runs the master loop
+ */
+extern pthread_t master_thread;
+
/**
- * The file descriptor of the socket
- * that is connected to the server
+ * Whether the server has been signaled to terminate
*/
-extern int socket_fd;
+extern volatile sig_atomic_t terminating;
/**
- * The thread that runs the master loop
+ * Whether the server has been signaled to re-exec
*/
-extern pthread_t master_thread;
+extern volatile sig_atomic_t reexecing;
+
+
+/**
+ * The file descriptor of the socket
+ * that is connected to the server
+ */
+extern int socket_fd;
@@ -95,6 +117,8 @@ int trap_signals(void);
* This function is called when a signal that
* signals the server to re-exec has been received
*
+ * When this function is invoked, it should set `reexecing` to a non-zero value
+ *
* @param signo The signal that has been received
*/
extern void received_reexec(int signo);
@@ -105,10 +129,74 @@ extern void received_reexec(int signo);
* This function is called when a signal that
* signals the server to re-exec has been received
*
+ * When this function is invoked, it should set `terminating` to a non-zero value
+ *
* @param signo The signal that has been received
*/
extern void received_terminate(int signo);
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * This function should initialise the server,
+ * and it not invoked after a re-exec.
+ *
+ * @return Non-zero on error
+ */
+extern int initialise_server(void);
+
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * Unmarshal server implementation specific data and update the servers state accordingly
+ *
+ * @param state_buf The marshalled data that as not been read already
+ * @return Non-zero on error
+ */
+extern int unmarshal_server(char* state_buf);
+
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * Marshal server implementation specific data into a buffer
+ *
+ * @param state_buf The buffer for the marshalled data
+ * @return Non-zero on error
+ */
+extern int marshal_server(char* state_buf);
+
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * Calculate the number of bytes that will be stored by `marshal_server`
+ *
+ * On failure the program should `abort()` or exit by other means.
+ * However it should not be possible for this function to fail.
+ *
+ * @return The number of bytes that will be stored by `marshal_server`
+ */
+extern size_t marshal_server_size(void);
+
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * Attempt to recover from an re-exec failure that has been
+ * detected after the server successfully updated it execution image
+ *
+ * @return Non-zero on error
+ */
+extern int reexec_failure_recover(void);
+
+
+/**
+ * This function should be implemented by the actual server implementation
+ *
+ * Perform the server's mission
+ *
+ * @return Non-zero on error
+ */
+extern int master_loop(void);
+
#endif
diff --git a/src/mds-server/reexec.c b/src/mds-server/reexec.c
index 0262288..2b41112 100644
--- a/src/mds-server/reexec.c
+++ b/src/mds-server/reexec.c
@@ -70,10 +70,9 @@ int marshal_server(int fd)
state_n += sizeof(int) + sizeof(sig_atomic_t) + 2 * sizeof(uint64_t) + 2 * sizeof(size_t);
state_n += list_elements * sizeof(size_t) + list_size + map_size;
- /* Allocate a buffer for all data except the client list and the client map. */
+ /* Allocate a buffer for all data. */
state_buf = state_buf_ = malloc(state_n);
- if (state_buf == NULL)
- goto fail;
+ fail_if (state_buf == NULL);
/* Tell the new version of the program what version of the program it is marshalling. */
@@ -109,13 +108,12 @@ int marshal_server(int fd)
fd_table_marshal(&client_map, state_buf_);
/* Send the marshalled data into the file. */
- if (full_write(fd, state_buf, state_n) < 0)
- goto fail;
+ fail_if (full_write(fd, state_buf, state_n) < 0);
free(state_buf);
return 0;
- fail:
+ pfail:
perror(*argv);
free(state_buf);
return -1;
@@ -296,23 +294,27 @@ void perform_reexec(int reexec)
int reexec_fd;
char shm_path[NAME_MAX + 1];
ssize_t node;
-
+
/* Join with all slaves threads. */
with_mutex (slave_mutex,
while (running_slaves > 0)
pthread_cond_wait(&slave_cond, &slave_mutex););
-
+
/* Release resources. */
pthread_mutex_destroy(&slave_mutex);
pthread_cond_destroy(&slave_cond);
pthread_mutex_destroy(&modify_mutex);
pthread_cond_destroy(&modify_cond);
hash_table_destroy(&modify_map, NULL, NULL);
-
+
/* Marshal the state of the server. */
xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid);
reexec_fd = shm_open(shm_path, O_RDWR | O_CREAT | O_EXCL, S_IRWXU);
- fail_if (reexec_fd < 0);
+ if (reexec_fd < 0)
+ {
+ perror(*argv);
+ return;
+ }
fail_if (marshal_server(reexec_fd) < 0);
close(reexec_fd);
reexec_fd = -1;
@@ -332,10 +334,8 @@ void perform_reexec(int reexec)
pfail:
perror(*argv);
if (reexec_fd >= 0)
- {
- close(reexec_fd);
- shm_unlink(shm_path);
- }
+ close(reexec_fd);
+ shm_unlink(shm_path);
}
@@ -354,10 +354,15 @@ void complete_reexec(int socket_fd)
xsnprintf(shm_path, SHM_PATH_PATTERN, (unsigned long int)pid);
reexec_fd = shm_open(shm_path, O_RDONLY, S_IRWXU);
fail_if (reexec_fd < 0); /* Critical. */
+
/* Unmarshal state. */
r = unmarshal_server(reexec_fd);
+
+ /* Close and unlink marshalled data. */
close(reexec_fd);
shm_unlink(shm_path);
+
+ /* Recover after failure. */
if (r < 0)
{
/* Close all files (hopefully sockets) we do not know what they are. */