#!/bin/bash
# Global variables
UPS='ups.domain.com'
FS='fs.domain.com'
FS_PORT='2'
PASSWORD='password'
MGR="/opt/bin/mgr"
MS_PORT='42'
CONSUMER_MS='msa1.domain.com msa2.domain.com msa3.domain.com'
BUSINESS_MS='msb1.domain.com msb2.domain.com msb3.domain.com'
count=0
echo "UID:EMAIL:ALIASES:COS:LAST_LOGIN:CREATED:DOMAIN:ENABLED:ANTISPAM:QUOTA_LIMIT:QUOTA_USED:FORWARDING_WITH_COPY\n" > report.txt
for server in $CONSUMER_MS; do
for uid in `$MGR -s $server -p $MS_PORT -w $PASSWORD USER mbox.domain.com LIST | awk '{ print $3 }'`; do
# Query MS
MS_OUT=`$MGR -s $server -p $MS_PORT -w $PASSWORD USER mbox.domain.com SHOW $uid | awk '{ print $2 " " $3 }'`
used=` awk '/DISKUSAGE/ { print $2 }' <<< "$MS_OUT"`
limit=`awk '/BASEQUOTA/ { print $2 }' <<< "$MS_OUT"`
# Query FS
FS_OUT=`$MGR -s $FS -p $FS_PORT -w $PASSWORD USER SHOW $uid mbox.domain.com`
mailbox=` awk '/MAILPATH/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
forwarding=`awk '/PSFORWARDASATTACHMENT/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
email=` awk '/PSACCOUNTEMAILADDRESS/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
mailhost=` awk '/MAILHOST/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
cos=` awk '/MAILCLASS/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
enabled=` awk '/ENABLEDFLAG/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
last_login=`awk '/LASTSUCCESSFULLOGON/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
# Query mailbox
if test -n "$mailhost" && -n "$mailbox"
then
echo "created=`ssh $mailhost -i mailbox_info.pub grep $mailbox UID | awk '{ print $2}'`"
fi
echo "$uid:$email:$aliases:$cos:$last_login:$created:$domain:$enabled:$antispam:$limit:$used:$forwarding" >> report.txt
echo $((++count))
done
done
for server in $BUSINESS_MS; do
echo $server
done
echo 'Script completed.'
Refactorings
No refactoring yet !
Ants
June 3, 2009, June 03, 2009 09:10, permalink
On a quick scan it looks like MS_OUT is being performed twice, and FS_OUT is being performed seven times. Options are
(1) to perform these only once each and store the output to a temp file and then do the multiple calls to awk; or
(2) to perform these only once each and call awk with an awk script that extracts all the information in one pass.
Zmyrgel
June 3, 2009, June 03, 2009 09:58, permalink
Umm, in which part it gets executed several times. MS_OUT's command is run once and output stored to variable that is passed to awk's.
I tested it with small shell script and if it would execute TEMP two times it would output different files but it prints two identical files so that is not the case.
One thing I've so far done is to replace those simple field selection awk's with cut.
TEMP=`mktemp`
`awk '/tmp/ { print }' <<< "$TEMP"`
`awk '/tmp/ { print }' <<< "$TEMP"`
Ants
June 3, 2009, June 03, 2009 22:17, permalink
Sorry, my mistake. Dealing with different scripting languages, I get confused by behaviors.
So is most of the 50 hours spent just waiting for the responses from the calls to 'mgr', rather than processing the output?
Zmyrgel
June 4, 2009, June 04, 2009 08:23, permalink
I'd say most of the time is waiting the mgr rather than processing. But looking at the awk lines I'd guess it causes some overhead as it invokes awk multiple times to parse same input. Refactoring it so that it would be single awk instance to process all the input could yield some benefit.
Currently figuring out if some server would contain all the user info so I could get it with single server call.
Ants
June 7, 2009, June 07, 2009 04:18, permalink
Instead of waiting sequentially to collect the information, do things in parallel. Essentially transplant lines 17-38 into another shell script, but ensure each of their output files is unique. Then use the Bourne shell wait command to wait for all the scripts to finish running. Once done, cat all the unique files together.
My Bourne shell scripting is really rusty so you'll have to forgive any errors.
#
# queryDetails.sh
#
# Global variables
server=$1
uid=$2
UPS='ups.domain.com'
FS='fs.domain.com'
FS_PORT='2'
PASSWORD='password'
MGR="/opt/bin/mgr"
MS_PORT='42'
CONSUMER_MS='msa1.domain.com msa2.domain.com msa3.domain.com'
BUSINESS_MS='msb1.domain.com msb2.domain.com msb3.domain.com'
count=0
# Query MS
echo "UID:EMAIL:ALIASES:COS:LAST_LOGIN:CREATED:DOMAIN:ENABLED:ANTISPAM:QUOTA_LIMIT:QUOTA_USED:FORWARDING_WITH_COPY\n" > report.txt
MS_OUT=`$MGR -s $server -p $MS_PORT -w $PASSWORD USER mbox.domain.com SHOW $uid | awk '{ print $2 " " $3 }'`
used=` awk '/DISKUSAGE/ { print $2 }' <<< "$MS_OUT"`
limit=`awk '/BASEQUOTA/ { print $2 }' <<< "$MS_OUT"`
# Query FS
FS_OUT=`$MGR -s $FS -p $FS_PORT -w $PASSWORD USER SHOW $uid mbox.domain.com`
mailbox=` awk '/MAILPATH/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
forwarding=`awk '/PSFORWARDASATTACHMENT/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
email=` awk '/PSACCOUNTEMAILADDRESS/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
mailhost=` awk '/MAILHOST/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
cos=` awk '/MAILCLASS/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
enabled=` awk '/ENABLEDFLAG/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
last_login=`awk '/LASTSUCCESSFULLOGON/ {split($0,a,"="); print a[2]}' <<< "$FS_OUT"`
# Query mailbox
if test -n "$mailhost" && -n "$mailbox"
then
echo "created=`ssh $mailhost -i mailbox_info.pub grep $mailbox UID | awk '{ print $2}'`"
fi
echo "$uid:$email:$aliases:$cos:$last_login:$created:$domain:$enabled:$antispam:$limit:$used:$forwarding"
#
# getAllMailBoxes.sh
#
#!/bin/bash
# Global variables
UPS='ups.domain.com'
FS='fs.domain.com'
FS_PORT='2'
PASSWORD='password'
MGR="/opt/bin/mgr"
MS_PORT='42'
CONSUMER_MS='msa1.domain.com msa2.domain.com msa3.domain.com'
BUSINESS_MS='msb1.domain.com msb2.domain.com msb3.domain.com'
count=0
# delete any old partial reports
rm $tmp\report_*.txt
echo "UID:EMAIL:ALIASES:COS:LAST_LOGIN:CREATED:DOMAIN:ENABLED:ANTISPAM:QUOTA_LIMIT:QUOTA_USED:FORWARDING_WITH_COPY\n" > report.txt
for server in $CONSUMER_MS; do
for uid in `$MGR -s $server -p $MS_PORT -w $PASSWORD USER mbox.domain.com LIST | awk '{ print $3 }'`; do
ssh queryDetails.sh $server $uid > $tmp\report_$count.txt &
echo $((++count))
done
done
# wait for all the queryDetails to finish running
wait
# append all the reports to master report file
cat $tmp\report_*.txt >> report.txt
for server in $BUSINESS_MS; do
echo $server
done
echo 'Script completed.'
draconesca
July 24, 2009, July 24, 2009 01:49, permalink
home intergovernmental system open royal national recent open
I'm trying to collect customer statistics with a shell script.Problem is that currently its runtime is about 50h which is too much. As I'm pretty new to scripting so my solution works but obviously it could be better.
My main concern currently is if the refactoring the if statements to use single awk instance would yield some speed gain.
After that I should continue to find some way to reduce those network calls and check if they could be replaced with ldap queries or something as I guess those would be faster to execute.