= Script: Bash: AIX: Daily Check Script
**Summary**: A daily script to check all sort of stuff on AIX. \\
**Date**: 27 December 2010 \\
**Refactor**: 21 February 2025: Checked links and formatting. \\
{{tag>aix bash}}
#!/bin/bash
########################################################################################################################
# Author : Sjoerd Hooft
# Date Initial Version: 27 Dec 2010
# Comments: sjoerd_@_warmetal_nl
#
# Description:
# This is a sample script to perform the daily checks on AIX servers.
#
# Recommendations:
# The script is designed for a 120 column terminal.
# The running user must be able to do a passwordless sudo to root.
#
# Changes:
# Please comment on your changes to the script (your name and email address, line number, description):
########################################################################################################################
# Script Variables
HOSTNAME_SHORT=`hostname -s`
AUTOMATIC=0
BASEDIR=`dirname $0`
LOGFILE="$BASEDIR/dc.log"
WHATAMI=`basename $0`
DATE=`date +%Y%m%d`
TOMAIL=sjoerd_@_warmetal_nl
BOLD=`tput bold`
BOLDOFF=`tput sgr0`
# Directories
APPDIR="/var/log/APP"
WASDIR="/opt/WAS_Profiles/AppSrv/logs"
FILE3DIR="/var/data/FILE3"
FILE1DIR="/var/data/FILE1/log"
JMSDIR="/var/data/app/jms_errors"
TOMCATDIR="/var/log/app"
# Oracle Variables
ORACLE_HOME="/opt/oracle/product/10.2"
ORACLE_BASE="/opt/oracle"
ORACLE_SID_DB1=db1
ORACLE_SID_DB2=db2
export ORACLE_HOME ORACLE_BASE
# Function to pause the script
# The operator can evaluate the outcome of the previous function
scriptContinue () {
if [ "$AUTOMATIC" == "0" ]; then
echo "Press ENTER to continue"
read CONTINUE
clear
fi
}
# Function that will list the AIX internal errors
checkErrors () {
echo "$BOLD Listing the Error Logging Facility: $BOLDOFF"
errpt
echo
}
# Function that will clear all AIX internal errors
clearErrors () {
echo "Clearing Errors"
sudo errclear 0
}
# Function that will let the operator view the AIX internal errors in detail
viewErrors () {
echo "Viewing Errors"
errpt -a | less
}
# Function that will remove all files from the protected directory that holds JMS/MQ errors
removeJms () {
echo "Are you sure you want to remove the JMS error files from $JMSDIR? "
echo "If you hesitate, press CTRL+C to exit the script. "
scriptContinue
echo "Removing these files: "
echo $JMSDIR/*
sudo rm $JMSDIR/*
echo
echo "Done"
echo
}
# Function that will check the last 4 logfiles from 4 different applications
# This is possible with multiple for loops since the files are named similar
# Known errors are being skipped
# It will show only the last 10 entries per logfile
checkLog-abs () {
echo "$BOLD Checking abs-logs in $APPDIR $BOLDOFF "
echo "Note: we check the last 4 logfiles and skip any known error, and limit the amount of lines to 10."
for application in appserver1 appserver2 appserver3; do
for logfile in app.log.4 app.log.3 app.log.2 app.log; do
echo "Checking $BOLD $application-$logfile $BOLDOFF "
cat $ABSDIR/$application-$logfile | grep ERROR | \
grep -v 'LDAP: error code 32 - No Such Object' | \
grep -v 'doRefreshProposalsResponse didn.d send the email caught - ignoring' | \
grep -v 'Error getting active tan: No TAN available for user' | \
grep -v 'CORBA OBJECT_NOT_EXIST' | \
tail -10
echo
done
scriptContinue
clear
done
echo
}
# Function to check the SystemOut.log from the websphere applications
# Known errors are being skipped
# It will show only the last 10 entries per logfile
checkLog-was () {
echo "$BOLD Checking websphere logs in $WASDIR $BOLDOFF "
for server in server1 server2 server3 server4; do
echo "Checking $BOLD ${server}_Server/SystemOut.log $BOLDOFF "
cat $WASDIR/${server}_Server/SystemOut.log | grep -i error | \
grep -v 'oracle.jdbc.driver.DatabaseError.throwSqlException' | \
grep -v 'The Network Adapter could not establish the connectionDSRA0010E' | \
grep -v 'Error creating XA Connection and Resource com.ibm.ws.exception.WsException: DSRA8100E' | \
grep -v 'Error creating XA Connection and Resource java.security.PrivilegedActionException:' | \
tail -10
echo
scriptContinue
done
echo
}
# Function to check whether files have been processed.
# They will have a different extention.
checkFiles-host3 () {
echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF "
echo "There should be no files ending on .txt older than one hour:"
echo "Last 10 files ending on .txt in $FILE3DIR:"
ls -ltr $FILE3DIR | grep '\.txt$' | tail -10
echo
echo "$BOLD Checking the process on $HOSTNAME_SHORT $BOLDOFF "
echo "There should be recent (last 24 hours) files:"
echo "Last 10 files in in $FILE3DIR:"
ls -ltr $FILE3DIR | grep '\.txt' | tail -10
echo
scriptContinue
}
# Function that will check whether error files exist
# It will allow the operator, after examining the size, to delete them
# Continue works only in this menu structure because this is the last check for this host
checkFiles-host1 () {
echo "$BOLD Checking MQ process error files on $HOSTNAME_SHORT $BOLDOFF "
echo "Checking for jms (MQ) errors in $JMSDIR, there should be no files in this directory:"
ls -ltr $JMSDIR
if [ $AUTOMATIC == 0 ]; then
JMSACTION=`ls -ltr $JMSDIR | wc -l`
if [ $JMSACTION -gt 1 ]; then
echo
echo "${BOLD}There are files in this directory!$BOLDOFF If all files are really small ( < 100 bytes ) you can delete them. "
echo " Would you like to do that right now?"
echo
echo "remove - remove all files in $JMSDIR"
echo "continue - continue with dailycheck"
echo
menuChoice
fi
scriptContinue
else
echo "AUTOMATIC mode is on. If there are any files run the script manually on $HOSTNAME_SHORT "
fi
}
# Function to check Oracle logfile bdump for errors
# It will show the line with the error, as well as the 2 lines before and after
# It will show only the last 10 entries per logfile
checkLog-ora () {
ORALOGDIR="/var/log/oracle/10.2/${ORACLE_SID}/bdump"
echo "$BOLD Checking the Oracle logfile $ORALOGDIR/alert_$ORACLE_SID.log $BOLDOFF "
echo "The last 10 ORA- messages are displayed, including the 2 lines before and the two lines after "
sudo cat $ORALOGDIR/alert_$ORACLE_SID.log | sed -e '
1{$!N;$d;}
$!N;/ORA-/!D
$!N;$d;N;p
g;$!N;$d;N;D
'| tail -10
echo
scriptContinue
}
# Function to check tomcat application servers for errors
# It will evaluate all logfiles created the last four days
# Known errors are being skipped
checkLog-tomcat () {
echo "$BOLD Checking the tomcat application server logs on $HOSTNAME_SHORT $BOLDOFF "
echo "$BOLD Checking Tomcat logfiles: $BOLDOFF"
echo "Checking the last four days of $TOMCATDIR/applicaton.log files"
find $TOMCATDIR/app/. -type f -name 'application*' -mtime -3 -print -exec cat {} \; | grep ERROR
echo
echo "Checking the last four days of $TOMCATDIR/framework.log files"
find $TOMCATDIR/app/. -type f -name 'framework*' -mtime -3 -print -exec cat {} \; | grep ERROR
echo
scriptContinue
}
# Function to expand the options handling AIX system errors
actionErrors () {
menuStart
checkErrors
echo "Note: The system clears all hardware errors automatically after 90 days, and all other errors after 30 days."
echo
echo "clearerrors - clear all errors now"
echo "viewerrors - review errors in less"
}
# Function to specify which host the script runs on
# Declare host specific variables
# Set the actions to be taken
hostSpecific () {
clear
if [ "$HOSTNAME_SHORT" == "host1" ]; then
checkLog-abs
checkLog-was
checkFiles-host1
fi
if [ "$HOSTNAME_SHORT" == "host2" ]; then
export ORACLE_SID=$ORACLE_SID_DB2
checkLog-ora
fi
if [ "$HOSTNAME_SHORT" == "host3" ]; then
export ORACLE_SID=$ORACLE_SID_DB1
checkLog-ora
checkLog-tomcat
checkFiles-host3
fi
}
# Function to clear the screen and give the idea of a pretty script
menuStart () {
clear
echo "########################################################################################################################"
echo "################################################### Daily Check Menu ###################################################"
echo
}
# Function to show the operator the default menu options
menuEnd () {
echo
echo "errors - take further actions regarding errors"
echo "host - start host specific checks"
echo "auto - restarts the script and runs it automatically, after which the logfile is mailed to $TOMAIL "
echo " - this also works from the commandline: $WHATAMI auto "
echo
echo "exit - exit"
echo
}
# Function to read the menu option from the operator
# This menu is used for all required menus in the script
menuChoice () {
echo "Enter menu choice: [exit]"
read MENUCHOICE
if [ -z "$MENUCHOICE" ]; then
MENUCHOICE="exit"
fi
case $MENUCHOICE in
errors )
actionErrors
menuChoice
;;
host )
hostSpecific
;;
clearerrors )
clearErrors
;;
viewerrors )
viewErrors
;;
auto )
$BASEDIR/$WHATAMI auto
exit
;;
exit )
exit
;;
remove )
removeJms
;;
continue )
echo
;;
* )
echo "Wrong Input"
menuChoice
;;
esac
}
# Function to mail the log when the script has run automatically
mailLog () {
cat $LOGFILE | mail -s "Report $WHATAMI on $HOSTNAME_SHORT of $DATE" $TOMAIL
}
# Function to determine whether the script should run automatically
# Set the automatic variables to send the output to a logfile instead of a screen
# and make the logfile readable by removing bold text markers
# It also makes sure the logfile gets mailed
if [ "$1" == "auto" ]; then
AUTOMATIC=1
BOLD=
BOLDOFF=
exec > $LOGFILE 2>&1
checkErrors
hostSpecific
mailLog
exit
fi
# Actual script:
# Infinite while loop, as long the script is not exited,
# start the menu, check for errors and ask the operator what to do
while :
do
menuStart
checkErrors
menuEnd
menuChoice
done
//This wiki has been made possible by://