#!/bin/bash
# Quick-n-dirty, convert .gz to .bz2 for better compression and recoverability
#
# Example:
#
# find /mnt/bkps -name \*.gz > ~/rezipp-files.txt && rezip
# ( Will sort by smallest filesize and automatically skip <50MB .gz files )
# Files:
# Uses ~/rezipp-files.txt as input
# Creates ~/rezip.log, ~/rezip-files-sorted.txt
#
# Depends on external programs:
# bzip2, cut, du, grep, gzip, mount, sort, tee
# The PROPER way to kill this exec when it is running, is to press
# Ctrl-Z, then ' kill %jobnumber ' -- Example:
#
# ^Z
#[1]+ Stopped rezip
# kill %1
# [1]+ Terminated rezip
#
# If you DON'T do it that way, trust me - wacky things can happen.
# i.e. it will skip to the next file, and gzip/bzip2 will still be
# running in the BG. Don't use ^C.
# WARNING: If .gz files get deleted that were listed in rezipp-files.txt,
# you MUST re-do the "find"; before re-running.
# Otherwise, unexpected results may occur.
# Time this run started
estime=`date`
# =================
# User-defined vars
# =================
# External drive to use -- needs entry in /etc/fstab,
# and you must have proper read/write permissions.
# Sample /etc/fstab entry:
#
# /dev/sdb8 /mnt/wdvast ext3 defaults,noauto,noatime,rw 0 0
#
extdrv="/mnt/wdvast"
mount $extdrv
# Anything less than this size (KB) gets dropped from the to-process list
skipsize=50000
# Don't overwrite existing .bz2 files in case we had to re-run
# (Feature put in from hard experience :-/ after several hours of work lost)
protwork=1
infile=~/rezipp-files.txt
logfile=~/rezip.log
sortfile=~/rezip-files-sorted.txt
# ========================
# End of User-defined vars
# ========================
# Def function(s)
function rezipp () {
f2rz=$1
f2rzns=`basename $f2rz .gz`
# Strip extension and keep basename
# "dirname" is the opposite of basename
srcdir=`dirname $f2rz`
srcdir2=`echo $srcdir |cut -b 2-`
# Strip leading slash so we can recreate dir structure on external drv
logecho "--- Srcdir: $srcdir"
logecho "--- Desdir: $extdrv/$srcdir2"
logecho "o Converting file "$pfile"/"$tr": "$f2rzns
cd $extdrv
mkdir -pv $srcdir2
cd $srcdir2
stime=`date`
[ "$protwork" -eq "1" ] && if [ -e $f2rzns.bz2 ]; then
logecho ' ! Destination .bz2 exists - skipping'
return;
fi
# The Main Idea (TM)
time gzip -cd $f2rz 2>>$logfile \
|bzip2 > $f2rzns.bz2 2>>$logfile
# rc=$?
[ $? -ne 0 ] && logecho "!!! Job failed."
# XXX Currently this error checking does not work, if anyone can fix it
# please email me. :-\
ntime=`date`
logecho '+ Start time: '$stime
logecho '+ Finishtime: '$ntime
logecho '============='
}
# Echo something to current console AND log
# Can also handle piped input ( cmd |logecho )
# Warning: Has trouble echoing '*' even when quoted.
function logecho () {
args=$@
if [ -z "$args" ]; then
args='tmp'
while [ 1 ]; do
read -e -t2 args
if [ -n "$args" ]; then
echo $args |tee -a $logfile;
else
break;
fi
done
else
echo $args |tee -a $logfile;
fi
}
#=======================================
# Main code
#=======================================
# Supply EOF if we don't have it
chkeof=`grep "EOF" $infile`
[ -n "$chkeof" ] || echo "EOF" >> $infile
# If string length >0, continue; otherwise append EOF
logecho '----- '$0' Run started at: '$estime
# ThisRecord; these are the ones we want to keep
let tr=1
let thisline=0
echo 'o Reading array...'
{
while read elemt; do
let thisline=$thisline+1
# Skip comments
commentmp=`(echo $elemt |grep -c -e "\#")`
test4blank=${elemt//" "/""}
# Replaces spaces with nulls
if [ $commentmp -gt 0 ]; then
echo "Found a comment in line "$thisline". Skipping."
elif [ ${#test4blank} -eq 0 ]; then
echo "Blank line at "$thisline". Skipping."
elif [ "$elemt" = "EOF" ]; then
echo "EOF found in line "$thisline"."
break
else
riptrack[$tr]=$elemt
let tr=$tr+1
fi
done
} < $infile
# Use file as input to unnamed function
let tr=$tr-1
logecho '--- '$tr' Files in original Find results.'
# Sort by smallest filesize
> $sortfile
# (Blank the file)
logecho '--- Sorting files by size...'
for i in "${riptrack[@]}"; do
echo `du $i` >> $sortfile
done
# Overwrite in-situ
sort -g $sortfile -o $sortfile
echo "EOF" >> $sortfile
logecho '--- Discarding files that are less than '$skipsize'KB...'
let tr=1
let drpfls=0
let thisline=0
# Have to destroy array so we can re-use it
#riptrack=("reinit")
unset riptrack[@]
#echo 'o Reading sorted array...'
{
while read elemt; do
let thisline=$thisline+1
if [ "$elemt" = "EOF" ]; then
echo "EOF found in line "$thisline"."
break
fi
field1=`echo $elemt| cut -d' ' -f1`
field2=`echo $elemt| cut -d' ' -f2`
#echo $field1'..'$field2
if [ $field1 -lt $skipsize ]; then
logecho ' ! Dropped '$field2
let drpfls=$drpfls+1
else
riptrack[$tr]=$field2
let tr=$tr+1
fi
done
} < $sortfile
let tr=$tr-1
logecho '--- Total files dropped: '$drpfls
logecho '--- About to process: '$tr' files. You might want to take a break. :)'
# Do it! :)
pfile=0
for i in "${riptrack[@]}"; do
let pfile=$pfile+1
# logecho $pfile'..'$i
rezipp $i
done
entime=`date`
logecho '------- '$0' Run finished at: '$estime' -- '$pfile' files processed.'
exit;
Copyright (C) 2005 and beyond David J Bechtel
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
The GNU Copyleft