contrib/package/freifunk-watchdog:

- poll /dev/watchdog if available
	- also monitor dropbear
This commit is contained in:
Jo-Philipp Wich 2009-05-12 22:16:24 +00:00
parent 153d287298
commit 89afc3a1d5
4 changed files with 96 additions and 16 deletions

View file

@ -8,7 +8,7 @@
include $(TOPDIR)/rules.mk include $(TOPDIR)/rules.mk
PKG_NAME:=freifunk-watchdog PKG_NAME:=freifunk-watchdog
PKG_RELEASE:=1 PKG_RELEASE:=2
PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME) PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME)
PKG_BUILD_DEPENDS := uci PKG_BUILD_DEPENDS := uci

View file

@ -10,6 +10,10 @@ boot()
echo "* * * * * $BIN running || /etc/init.d/freifunk-watchdog restart" >> /etc/crontabs/root echo "* * * * * $BIN running || /etc/init.d/freifunk-watchdog restart" >> /etc/crontabs/root
fi fi
if lsmod | grep -q softdog; then
mknod /dev/watchdog c 10 130
fi
start start
} }

View file

@ -131,6 +131,24 @@ static int find_process(const char *name)
return -1; return -1;
} }
/* Get the 5 minute load average */
static double find_loadavg(void)
{
int fd;
char buffer[10];
double load = 0.00;
if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
{
if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
load = atof(&buffer[5]);
close(fd);
}
return load;
}
/* Check if given uci file was updated */ /* Check if given uci file was updated */
static int check_uci_update(const char *config, time_t *mtime) static int check_uci_update(const char *config, time_t *mtime)
{ {
@ -240,6 +258,8 @@ static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
static int do_daemon(void) static int do_daemon(void)
{ {
int iwfd; int iwfd;
int wdfd;
int wdtrigger = 1;
int channel; int channel;
char bssid[18]; char bssid[18];
@ -248,32 +268,44 @@ static int do_daemon(void)
int restart_wifi = 0; int restart_wifi = 0;
int restart_cron = 0; int restart_cron = 0;
int restart_sshd = 0;
int loadavg_panic = 0;
openlog(SYSLOG_IDENT, 0, LOG_DAEMON); openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
//daemon(1, 1); //daemon(1, 1);
if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 ) if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
{ {
perror("Can not open wireless control socket"); syslog(LOG_ERR, "Can not open wireless control socket: %s",
strerror(errno));
return 1; return 1;
} }
if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
{
syslog(LOG_INFO, "Opened %s - polling each %i seconds",
WATCH_DEVICE, INTERVAL);
}
while( 1 ) while( 1 )
{ {
if( (ifs = load_wifi_uci(ifs, &modtime)) == NULL ) /* Check average load */
{ if( find_loadavg() >= LOAD_TRESHOLD )
printf("Can not load wireless uci. File corrupt?\n"); loadavg_panic++;
return 1; else
} loadavg_panic = 0;
/* Check crond */ /* Check crond */
if( find_process("crond") < 0 ) if( find_process("crond") < 0 )
{
syslog(LOG_WARNING, "The crond process died, restarting");
restart_cron++; restart_cron++;
}
/* Check SSHd */
if( find_process("dropbear") < 0 )
restart_sshd++;
/* Check wireless interfaces */ /* Check wireless interfaces */
ifs = load_wifi_uci(ifs, &modtime);
for( curif = ifs; curif; curif = curif->next ) for( curif = ifs; curif; curif = curif->next )
{ {
/* Get current channel and bssid */ /* Get current channel and bssid */
@ -309,7 +341,7 @@ static int do_daemon(void)
if( restart_wifi >= HYSTERESIS ) if( restart_wifi >= HYSTERESIS )
{ {
restart_wifi = 0; restart_wifi = 0;
syslog(LOG_WARNING, "Restarting wireless"); syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
EXEC(WIFI_ACTION); EXEC(WIFI_ACTION);
} }
@ -317,13 +349,44 @@ static int do_daemon(void)
if( restart_cron >= HYSTERESIS ) if( restart_cron >= HYSTERESIS )
{ {
restart_cron = 0; restart_cron = 0;
syslog(LOG_WARNING, "Restarting crond process"); syslog(LOG_WARNING, "The cron process died, restarting");
EXEC(CRON_ACTION); EXEC(CRON_ACTION);
} }
/* SSHd restart required? */
if( restart_sshd >= HYSTERESIS )
{
restart_sshd = 0;
syslog(LOG_WARNING, "The ssh process died, restarting");
EXEC(SSHD_ACTION);
}
/* Is there a load problem? */
if( loadavg_panic >= HYSTERESIS )
{
syslog(LOG_EMERG, "Critical system load level, triggering reset!");
/* Try watchdog, fall back to reboot */
if( wdfd > -1 )
ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
else
EXEC(LOAD_ACTION);
}
/* Reset watchdog timer */
if( wdfd > -1 )
write(wdfd, '\0', 1);
sleep(INTERVAL); sleep(INTERVAL);
} }
if( wdfd > -1 )
{
syslog(LOG_INFO, "Stopping watchdog timer");
write(wdfd, WATCH_SHUTDOWN, 1);
close(wdfd);
}
closelog(); closelog();
return 0; return 0;
} }

View file

@ -31,6 +31,8 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include "ucix.h" #include "ucix.h"
#include "wireless.22.h" #include "wireless.22.h"
@ -51,6 +53,17 @@
/* Crond error action */ /* Crond error action */
#define CRON_ACTION "/etc/init.d/cron", "/etc/init.d/cron", "restart" #define CRON_ACTION "/etc/init.d/cron", "/etc/init.d/cron", "restart"
/* SSHd error action */
#define SSHD_ACTION "/etc/init.d/dropbear", "/etc/init.d/dropbear", "restart"
/* Watchdog device */
#define WATCH_DEVICE "/dev/watchdog"
#define WATCH_SHUTDOWN 'V'
/* System load error action and treshold */
#define LOAD_TRESHOLD 5.00
#define LOAD_ACTION "/sbin/reboot"
/* Fallback binary name (passed by makefile) */ /* Fallback binary name (passed by makefile) */
#ifndef BINARY #ifndef BINARY
#define BINARY "ffwatchd" #define BINARY "ffwatchd"