gnashley |
07-16-2008 03:33 AM |
I don't understand why you wouldn't want to use wc, or why it's any better to use tail and sort, but I worked out code in pure BASH for all three commands and 10 more. They are all avialable here:
http://distro.ibiblio.org/pub/linux/...ects/BashTrix/
Here's the code for wc:
Code:
#!/bin/bash
# copyright 2007 Gilbert Ashley <amigo@ibiblio.org>
# BashTrix wc is an implementation of the 'wc' command
# written in pure shell. Most wc options are supported.
VERSION=0.2
# show program usage
show_usage() {
echo
echo "Usage: ${0##/*} [OPTION]... [FILE]..."
echo "${0##/} -[m|w|l|L] FILE"
echo " or: (cat|echo) | ${0##/*} [OPTION]... "
echo "Print newline, word, and byte counts for each FILE"
echo "and a total line if more than one FILE is specified."
# echo "With no FILE, or when FILE is - read standard input." #conflicts
echo " -m, --chars print the character counts"
echo " -l, --lines print the newline counts"
echo " -L, --max-line-length print the length of the longest line"
echo " -w, --words print the word counts"
echo " --help display this help and exit"
echo " --version output version information and exit"
exit
}
show_version() {
echo ${0##/*}" (BashTrix) $VERSION"
echo "Copyright 2007 Gilbert Ashley <amigo@ibiblio.org>"
echo "This is free software written in pure POSIX shell."
exit
}
# Minimum number of arguments needed by this program
MINARGS=1
# show usage if '-h' or '--help' is the first argument or no argument is given
case $1 in
""|"-h"|"--help") show_usage ;;
"--version") show_version ;;
esac
# get the number of command-line arguments given
ARGC=$#
# check to make sure enough arguments were given or exit
if [[ $ARGC -lt $MINARGS ]] ; then
echo "Too few arguments given (Minimum:$MINARGS)"
echo
show_usage
fi
for WORD in "$@" ; do
case $WORD in
-*) true ;
case $WORD in
-m) COUNT_CHARS=1 ; shift ;;
-w) COUNT_WORDS=1 ; shift ;;
-l) COUNT_LINES=1 ; shift ;;
-L) MAX_LINE_LENGTH=1 ; shift ;;
--help) show_usage ;;
--version) show_version ;;
# -) READ_STDIN=1 ; shift ;;
-*) echo "Unrecognized argument" ; show_usage ;;
esac
;;
esac
done
# function _freq counts the number of matches
# of PATTERN in PARSESTRING and returns FREQ
# example usage: _freq $PATTERN $PARSESTRING
function _freq() { FREQ=0
! [[ $PATTERN ]] && PATTERN=$1
! [[ $PARSESTRING ]] && PARSESTRING=$2
while [[ $PARSESTRING != "" ]] ; do
case $PARSESTRING in
*$PATTERN*) (( FREQ++ )) ;
PARSESTRING=${PARSESTRING#*${PATTERN}} ;;
*) PARSESTRING="" ;;
esac
done
echo $FREQ
}
# convert tabs to spaces
function uniform_white() {
while read GAGA ; do
echo $GAGA
done
}
# function _line_word_count counts the words in a line
function _line_word_count() { LINE_WORD_COUNT=0
# turn all TABS into single spaces
STRING=$(echo $1 | uniform_white)
# strip off leading spaces
SEP=" "
while [[ ${STRING:0:1} = $SEP ]] ; do
STRING=${STRING:1}
done
# strip trailing spaces
OFFSET=$(( ${#STRING} - 1 ))
while [[ ${STRING:$OFFSET:1} = $SEP ]] ; do
# remove one CHAR from the STRING
STRING=${STRING:0:$OFFSET}
# decrement the OFFSET by one for the removed character
(( OFFSET-- ))
done
PARSESTRING=$STRING
# count the number of spaces
_freq " " $PARSESTRING 1> /dev/null
# the number of words is spaces +1 except for blank lines
if [[ "$STRING" != "" ]] ; then
LINE_WORD_COUNT=$(( $FREQ + 1 ))
fi
}
# function _line_char_count counts the characters in a line UNUSED
function _line_char_count() { LINE_CHAR_COUNT=0
#PARSESTRING=$(echo $1 | uniform_white)
PARSESTRING="$1"
while [[ $PARSESTRING != "" ]] ; do
# read one character
FC=${PARSESTRING:0:1}
# advance the poiter one character
PARSESTRING=${PARSESTRING:1}
(( LINE_CHAR_COUNT++ ))
done
# add an extra character for the end of line CHAR
(( LINE_CHAR_COUNT++ ))
}
if [[ $# -gt 0 ]] ; then
FILE_COUNT=0
TOTAL_LINE_COUNT=0
TOTAL_WORD_COUNT=0
TOTAL_CHAR_COUNT=0
TOTAL_LONGEST_LINE=0
while [[ $# -gt 0 ]] ; do
# count number of input files
(( FILE_COUNT++ ))
FILE_NAME="$1"
if [ ! -r "$1" ] ; then
echo "Cannot find file $1" 1>&2
exit 1
else
FILE_LINE_COUNT=0
FILE_WORD_COUNT=0
FILE_CHAR_COUNT=0
FILE_LONGEST_LINE=0
LINE=
IFS=
while read LINE ; do
# add the curent line to the line counter
(( FILE_LINE_COUNT++ ))
# capture the text of the line
STRING="$LINE"
# count the words in this line
if [[ $COUNT_WORDS ]] ; then
_line_word_count $STRING
FILE_WORD_COUNT=$(( $FILE_WORD_COUNT + $LINE_WORD_COUNT ))
fi
# count the characters in this line
if [[ $COUNT_CHARS ]] ; then
LINE_CHAR_COUNT=$(( ${#LINE} + 1 ))
FILE_CHAR_COUNT=$(( $FILE_CHAR_COUNT + $LINE_CHAR_COUNT ))
fi
if [[ $MAX_LINE_LENGTH ]] ; then
LINE_CHAR_COUNT=$(( ${#LINE} ))
if [[ $LINE_CHAR_COUNT -gt $FILE_LONGEST_LINE ]] ; then
FILE_LONGEST_LINE=$LINE_CHAR_COUNT
fi
fi
# go to next LINE
done <"$1"
TOTAL_LINE_COUNT=$((TOTAL_LINE_COUNT + $FILE_LINE_COUNT))
TOTAL_WORD_COUNT=$(( $TOTAL_WORD_COUNT + $FILE_WORD_COUNT ))
TOTAL_CHAR_COUNT=$(( $TOTAL_CHAR_COUNT + $FILE_CHAR_COUNT ))
fi
if [[ $FILE_LONGEST_LINE -gt $TOTAL_LONGEST_LINE ]] ; then
TOTAL_LONGEST_LINE=$FILE_LONGEST_LINE
fi
FILE_OUTPUT="$(echo $FILE_LINE_COUNT $FILE_WORD_COUNT $FILE_CHAR_COUNT \
$FILE_LONGEST_LINE $FILE_NAME | uniform_white)"
echo $FILE_OUTPUT
# go to next FILE in $@
shift
done
# detect multiple input files so the totals can be shown
if [[ $FILE_COUNT -gt 1 ]] ; then
TOTAL_OUTPUT="$(echo $TOTAL_LINE_COUNT $TOTAL_WORD_COUNT $TOTAL_CHAR_COUNT \
$TOTAL_LONGEST_LINE total | uniform_white)"
echo $TOTAL_OUTPUT
fi
exit
else
# accept piped-in input only if "$@" is empty ( $#=0 )
# elif [[ $READ_STDIN ]] would enforce the POSIX
# piped input is presumed to be separated into lines already
FILE_LINE_COUNT=0
FILE_WORD_COUNT=0
FILE_CHAR_COUNT=0
FILE_LONGEST_LINE=0
IFS=
while read LINE ; do
# capture the text of the line
STRING="$LINE"
#increment the line counter
(( FILE_LINE_COUNT++ ))
# count hte words in a line
if [[ $COUNT_WORDS ]] ; then
_line_word_count $STRING
TOTAL_WORD_COUNT=$(( $TOTAL_WORD_COUNT + $LINE_WORD_COUNT ))
fi
# count the characters in a line
if [[ $COUNT_CHARS ]] ; then
LINE_CHAR_COUNT=$(( ${#LINE} +1 ))
TOTAL_CHAR_COUNT=$(( $TOTAL_CHAR_COUNT + $LINE_CHAR_COUNT ))
fi
# find the length of the longest line
if [[ $MAX_LINE_LENGTH ]] ; then
LINE_CHAR_COUNT=$(( ${#LINE} ))
if [[ $LINE_CHAR_COUNT -gt $FILE_LONGEST_LINE ]] ; then
FILE_LONGEST_LINE=$LINE_CHAR_COUNT
fi
fi
# go to the next LINE
shift
done
TOTAL_LINE_COUNT=$((TOTAL_LINE_COUNT + $FILE_LINE_COUNT))
TOTAL_WORD_COUNT=$(( $TOTAL_WORD_COUNT + $FILE_WORD_COUNT ))
TOTAL_CHAR_COUNT=$(( $TOTAL_CHAR_COUNT + $FILE_CHAR_COUNT ))
TOTAL_LONGEST_LINE=$FILE_LONGEST_LINE
SHOW_TOTALS=1
shift
# printf "%-5s %-20s %s\n" "hello" "you" there
TOTAL_OUTPUT="$(echo $TOTAL_LINE_COUNT $TOTAL_WORD_COUNT $TOTAL_CHAR_COUNT \
$TOTAL_LONGEST_LINE total | uniform_white)"
TOTAL_OUTPUT="$(printf "%s %-2s %-2s %-2s %s\n" $TOTAL_LINE_COUNT $TOTAL_WORD_COUNT \
$TOTAL_CHAR_COUNT $TOTAL_LONGEST_LINE total)"
echo $TOTAL_OUTPUT
exit
fi
exit $ERROR
All 13 programs - cat, dirname, head, sort, tr, wc, basename, cut, grep, rev, tail, uniq and which, are written for easy reading(hopefully) and so are much longer than is really needed to do the job. Still, most only emulate the basic options for each command. I found the basename, dirname and cat commands pretty much done by others and wrote the rest from scratch.
It made a really interesting project to implement a few of these and some of the commands do some things the originals are not capable of. cut and sort were very tricky. Some day I'll revisit that corner of my system and write a few more.
|