#!/bin/bash
#
# diskdump	This starts, stops, and reloads the diskdump
#		and crashdump facility
#
# chkconfig: - 04 94
# description: Save dump file if previous system crashed and initialize diskdump module.
# config: /etc/sysconfig/diskdump
#
# $Id: diskdump.sh,v 1.81 2005/12/13 22:26:55 akira Exp $

# Source function library.
. /etc/rc.d/init.d/functions

# VERSION is given when "make install" is executed.
VERSION="1.2.8"
SERVICE_NAME=$(basename $0 .sh)
CONF_DISKDUMP="/etc/sysconfig/diskdump"
DISKDUMPFMT="/usr/sbin/diskdumpfmt"
SAVECORE="/usr/sbin/savecore"
DISKDUMPCTL="/usr/sbin/diskdumpctl"
DISKDUMPMSG="/usr/sbin/diskdumpmsg"
PROC_DISKDUMP="/proc/diskdump"
MODINFO="/sbin/modinfo"
MODPROBE="/sbin/modprobe"
RMMOD="/sbin/rmmod"
SYSFSROOT=$(grep -w -m1 sysfs /proc/mounts | cut -d\  -f2)
KERNEL=$(uname -r | awk '{print substr($0,1,3);}')
MODDIR=/lib/modules/$(uname -r)/kernel/
SENDMAIL="/usr/sbin/sendmail"
MAIL_TEMPLATE="/etc/diskdump/mail_template.us"

# diskdump version
if [ "$1" = "version" ]; then
	echo "$SERVICE_NAME version $VERSION" >&2
	exit 0
fi

# Source Dump Device
if [ -f $CONF_DISKDUMP ]; then
	. $CONF_DISKDUMP
fi

if [ -z $DEVICE ]; then
	if [ "$1" != "swapsavecore" ]; then
		echo "Device not specified in $CONF_DISKDUMP" >&2
	fi
	exit 1
fi

# Deferred Savecore
if [[ $EXPIRATION -lt 0 ]]; then
	if [ "$1" != "swapsavecore" ]; then
		echo "EXPIRATION(=$EXPIRATION) invalid in $CONF_DISKDUMP" >&2
	fi
	exit 1
fi

# Swap Partition Support and Message Complement
case "$KERNEL" in
"2.4")	SWAPSUPPORT="no"
	SALVAGEMESSAGE="no" ;;
"2.6")	SWAPSUPPORT="yes"
	if [ "$SALVAGEMESSAGE" != no ]; then
		SALVAGEMESSAGE=yes
	fi ;;
*)	echo "kernel version '$KERNEL' incorrect" >&2
	exit 1 ;;
esac

RETVAL=0

logging() {
	local cmnt="$1"

	logger -p info -t $SERVICE_NAME "$cmnt"
}

start_device() {
	local dev=$1
	local dev_type=$(get_device_type $dev)

	for reg in $(get_registered_device); do
		if [ "$dev_type" = "$(get_device_type $reg)" ]; then
			return
		fi
	done

	errmsg=$($DISKDUMPCTL $dev 2>&1)
	ret=$?
	if [ $ret -ne 0 ]; then
		logging "$errmsg"
	fi
}

stop_device() {
	local dev=$1

	errmsg=$($DISKDUMPCTL -u $dev 2>&1)
	if [ $? -ne 0 ]; then
		logging "$errmsg"
	fi
}

get_device_type() {
	local dev=$1

	stat --format="%t:%T" $dev
}

get_registered_device() {
	local devices=""
	local tmp=""

	devices=$(grep -v '^#' $PROC_DISKDUMP | cut -d\  -f1)
	if [ -z "$devices" ]; then
		return
	fi

	if [ "$KERNEL" = "2.6" ]; then
		for dev in $devices; do
			tmp="/dev/$dev "
		done
		devices=$tmp
	fi

	echo "$devices"
}

load_module() {
	local mod=$1
	local dir=$2

	if [ "$dir" ]; then
		case "$KERNEL" in
		"2.4")	if [ ! -f "$MODDIR/$dir/$mod.o" ]; then
				return
			fi ;;
		"2.6")	if [ ! -f "$MODDIR/$dir/$mod.ko" ]; then
				return
			fi ;;
		esac
	fi

	errmsg=$($MODPROBE $mod)
	if [ $? -ne 0 ]; then
		logging "$errmsg"
	fi
}

show_result() {
	local string=$1
	local good=$2
	local bad=$3

	if [ $bad -eq 0 ]; then
		success $string
	elif [ $good -eq 0 ]; then
		failure $string
	else
		warning $string
	fi
	echo >&2
}

reverse() {
	local list=$*
	local tmp=""

	for item in $list; do
		tmp="$item $tmp"
	done

	echo "$tmp"
}

get_dump_info()
{
	local dev=$1
	local param="$2"

	$DISKDUMPFMT -cv $dev 2>&1 | grep "$param" | cut -d: -f2- | tr -d ' '
}

notify() {
	local dev=$1
	local cause=$2
	local node_name=$(get_dump_info $dev "node name")
	local machine=$(get_dump_info $dev "machine")
	local release=$(get_dump_info $dev "release")
	local dumped_date=$(get_dump_info $dev "dumped date")
	local passed_days=$(get_dump_info $dev "passed days")

	if [ ! -e $SENDMAIL ]; then
		logging "unable to email ($SENDMAIL not exist)"
		return
	fi

	if [ ! -e $MAIL_TEMPLATE ]; then
		logging "unable to email ($MAIL_TEMPLATE not exist)"
		return
	fi

	if [ -z "$MAILTO" ]; then
		logging "MAILTO not specified in $CONF_DISKDUMP"
		return
	fi

	if [ -z "$FROM" ]; then
		FROM="root@$node_name"
	fi

	content=$(grep -v '^#' $MAIL_TEMPLATE\
	| sed "s|%MAILTO%|$MAILTO|g"\
	| sed "s|%FROM%|$FROM|g"\
	| sed "s|%CURRENT%|$(date +'%D %T')|g"\
	| sed "s|%NODENAME%|$node_name|g"\
	| sed "s|%RELEASE%|$release|g"\
	| sed "s|%MACHINE%|$machine|g"\
	| sed "s|%DEVICE%|$dev|g"\
	| sed "s|%CAUSE%|$cause|g"\
	| sed "s|%DAYS%|$passed_days|g"\
	| sed "s|%DUMPED_DATE%|$dumped_date|g")

	$SENDMAIL -t << EOS &
$content
EOS
	if [ $? -eq 0 ]; then
		logging "emailed from $FROM to $MAILTO"
	else
		logging "failed to email (an error occurred)"
	fi
}

complement_messages()
{
	local old_vmcores=$*
	local new_vmcores=/var/crash/127.0.0.1-*/vmcore

	for vmcore in $new_vmcores; do
		if [[ $old_vmcores == ${old_vmcores//$vmcore/} ]]; then
			# new vmcore
			$DISKDUMPMSG $vmcore
			if [ $? -ne 0 ]; then
				logging "$DISKDUMPMSG failed"
			fi
		fi
	done
}

start() {
	local opt=$1
	local good_devices
	local bad_devices
	local savecore_devices
	local reformat_devices
	local skipped_devices
	declare -i local good_count=0
	declare -i local bad_count=0

	if [ "$opt" = "-f" ]; then
		# Disable PRESERVEDUMP and SKIPSAVECORE temporarily
		PRESERVEDUMP="no"
		SKIPSAVECORE="no"
	else
		# Check if diskdump is already running
		status 2> /dev/null
		if [ $? -eq 0 ]; then
			echo "$SERVICE_NAME is already running" >&2
			return
		fi
	fi

	load_module scsi_dump
	load_module block_dump drivers/block
#	load_module ide_dump drivers/ide

	for dev in $(echo $DEVICE | tr ':' ' '); do
		$DISKDUMPFMT -c $dev 2> /dev/null
		ret=$?
		case $ret in
		0) good_devices="$good_devices $dev" ;;
		1) if [ "$SKIPSAVECORE" = "yes" ]; then
			skipped_devices="$skipped_devices $dev"
		   else
			savecore_devices="$savecore_devices $dev"
		   fi ;;
		3) reformat_devices="$reformat_devices $dev" ;;
		5) if [ "$SWAPSUPPORT" = "yes" ]; then
			good_devices="$good_devices $dev"
		   else
			bad_devices="$bad_devices $dev"
			logging "$dev is a swap device"
		   fi ;;
		6) if [ "$PRESERVEDUMP" = "yes" ]; then
			bad_devices="$bad_devices $dev"
			logging "$dev is a preserved dump device"
			notify $dev "PRESERVEDUMP"
		   else
			good_devices="$good_devices $dev"
		   fi ;;
		*) bad_devices="$bad_devices $dev" ;;
					# delay error message later
		esac
	done

	if [ ! -z "$skipped_devices" ]; then
		for dev in $skipped_devices; do
			if [[ $EXPIRATION -eq 0 ]]; then
				logging "$dev has no expiration"
				bad_devices="$bad_devices $dev"
				notify $dev "SKIPSAVECORE"
				continue
			fi

			passed_days=$(get_dump_info $dev "passed days")
			logging "$passed_days days passed since dumped to $dev"
			if [[ $passed_days -ge $EXPIRATION ]]; then
				reformat_devices="$reformat_devices $dev"
				logging "$dev expired"
			else
				bad_devices="$bad_devices $dev"
				logging "skipped \"savecore $dev\""
				notify $dev "SKIPSAVECORE"
			fi
		done
	fi

	if [ ! -z "$savecore_devices" ]; then
		echo "Saving panic dump: " >&2

		old_vmcores=$(shopt -s nullglob; echo /var/crash/127.0.0.1-*/vmcore)

		for dev in $savecore_devices; do
			$SAVECORE -p $opt $dev
			if [ $? -eq 0 ]; then
				good_count=good_count+1
				reformat_devices="$reformat_devices $dev"
				continue
			fi

			bad_count=bad_count+1
			if [ "$PRESERVEDUMP" = "yes" ]; then
				bad_devices="$bad_devices $dev"
				logging "$dev is a preserved dump device"
				notify $dev "PRESERVEDUMP"
			else
				reformat_devices="$reformat_devices $dev"
			fi
		done

		if [ "$SALVAGEMESSAGE" = yes ]; then
			complement_messages $old_vmcores
		fi

		rm -f /var/crash/127.0.0.1-*/vmcore-uncompressedrawdata

		show_result "saving" $good_count $bad_count
	fi

	good_count=0; bad_count=0

	if [ ! -z "$reformat_devices" ]; then
		echo "Formatting dump device: " >&2

		load_module diskdump
		for dev in $reformat_devices; do
			$DISKDUMPFMT -p $dev
			if [ $? -eq 0 ]; then
				good_devices="$good_devices $dev"
				good_count=good_count+1
			else
				bad_count=bad_count+1
			fi
		done

		show_result "formatting" $good_count $bad_count
	fi

	good_count=0

	echo -n "Starting diskdump: " >&2
	for dev in $bad_devices; do
		logging "$dev is not suited for dump device"
		bad_count=bad_count+1
	done
	for dev in $(reverse $good_devices); do
		start_device $dev
		if [ $ret -eq 0 ]; then
			good_count=good_count+1
		else
			bad_count=bad_count+1
		fi
	done

	show_result "activating" $good_count $bad_count

	if [ $good_count -eq 0 ]; then
		stop
		RETVAL=1
	else
		RETVAL=0
	fi
}

unload_module() {
	if grep ^scsi_dump\  /proc/modules > /dev/null; then
		$RMMOD scsi_dump
	fi
	if grep ^ide-dump\  /proc/modules > /dev/null; then
		$RMMOD ide-dump
	fi
	if grep ^block_dump\  /proc/modules > /dev/null; then
		$RMMOD block_dump
	fi
	if grep ^diskdump\  /proc/modules > /dev/null; then
		$RMMOD diskdump
	fi
}

stop() {
	case "$KERNEL" in
	"2.4")	if [ -f $PROC_DISKDUMP ]; then
			grep -v '^#' $PROC_DISKDUMP | while read dev sector; do
				stop_device $dev
			done
		fi ;;
	"2.6")	find $SYSFSROOT -name dump | while read f; do
			exec 3<&0 <$f
			while read dev; do
				stop_device "/dev/$dev"
			done
			exec 0<&3 3<&-
		done ;;
	*)	echo "kernel version '$KERNEL' incorrect" >&2
		exit 1
	esac
	unload_module
}

swapsavecore() {
	for dev in $(echo $DEVICE | tr ':' ' '); do
		$DISKDUMPFMT -c $dev 2> /dev/null
		if [ $? -eq 6 ]; then
			echo -e $"Saving panic dump from swap partition:\r" >&2
			$SAVECORE -D -p $dev
			RETVAL=$?
			echo -ne "\r" >&2
		fi
	done
}

format() {
	local opts=$1
	local ret=1
	local RM_MOD=0

	if [ $opts != "-ap" -a ! -f $PROC_DISKDUMP ]; then
		$MODPROBE diskdump
		RM_MOD=1
	fi

	echo "Formatting dump device: " >&2
	for dev in $(echo $DEVICE | tr ':' ' '); do
		$DISKDUMPFMT -c $dev 2> /dev/null
		case $? in
		5)	echo "$dev: skipped (swap device)" >&2
			ret=0
			continue ;;
		7)	echo "$dev: skipped (mounted device)" >&2
			ret=0
			continue ;;
		8)	echo "$dev: skipped (swap mounted)" >&2
			ret=0
			continue ;;
		esac

		if [ "$opts" = "-fp" ]; then
			echo -n "Do you want to format $dev (yes/NO)? " >&2
			read ans
			case $ans in
			y|Y|[yY][eE][sS]) ;;
			*)	echo "$dev: skipped" >&2
				continue ;;
			esac
		fi

		$DISKDUMPFMT $opts $dev
		if [ $? -eq 0 ]; then
			ret=0
		fi
	done

	if [ $RM_MOD -eq 1 ]; then
		$RMMOD diskdump
	fi

	if [ $ret -ne 0 ]; then
		echo "Each format processing failed" >&2
	fi

	RETVAL=$ret
}

devicestatus() {
	declare -i local bad_count=0

	for dev in $(echo $DEVICE | tr ':' ' '); do
		$DISKDUMPFMT -cv $dev
		ret=$?
		case $ret in
		0|5|7|8) ;;
		*)	bad_count=bad_count+1 ;;
		esac
	done

	if [ $bad_count -gt 0 ]; then
		echo "Each device has not been formatted" >&2
	fi
}

status() {
	if test -f $PROC_DISKDUMP && grep -v '^#' $PROC_DISKDUMP > /dev/null; then
		echo "$SERVICE_NAME enabled" >&2
		if [ "$PRESERVEDUMP" = "yes" ]; then
			echo "PRESERVEDUMP enabled" >&2
		else
			echo "PRESERVEDUMP not enabled" >&2
		fi
		if [ "$SKIPSAVECORE" = "yes" ]; then
			echo "SKIPSAVECORE enabled" >&2
		else
			echo "SKIPSAVECORE not enabled" >&2
		fi
		if [ "$KERNEL" != "2.6" ]; then
			return 0
		fi
		if [ "$SALVAGEMESSAGE" = "yes" ]; then
			echo "SALVAGEMESSAGE enabled" >&2
		else
			echo "SALVAGEMESSAGE not enabled" >&2
		fi
		return 0
	else
		echo "$SERVICE_NAME not enabled" >&2
		return 3
	fi
}

restart() {
	stop
	start
}

case "$1" in
start)		start ;;
stop)		stop ;;
swapsavecore)	swapsavecore ;;
format)		format -p ;;
initialformat)	format -fp ;;
regularformat)	format -ap ;;
devicestatus)	devicestatus ;;
status)		status
		RETVAL=$? ;;
restart|reload)	restart ;;
enabledevice)	start -f ;;
*) echo "\
Usage: service $SERVICE_NAME {subcommand}
Subcommands:
	start		start $SERVICE_NAME
	stop		stop $SERVICE_NAME
	format		do quick format all dump devices
	initialformat	do initial format all dump devices
	regularformat	do full format all dump devices
	status		show if $SERVICE_NAME is activated
	restart|reload	stop and start $SERVICE_NAME
	enabledevice	enable all dump devices and restart $SERVICE_NAME
	devicestatus	show status of all dump devices
	version		show $SERVICE_NAME version information\
" >&2
	exit 1
esac

exit $RETVAL
