#!/usr/bin/perl # by Harry Mangalam, mangalam@home.com. # mod 4.21.00 adding check for FLAT mode, modecount =1 # mod 11.24.99 adding Mode, Mode count, Median to output. # This is FREEWARE, and is worth exactly that! # perloid to take stdin of as many #s as are in the stream, whether in one line # or in many lines (only have to be separated by whitespace), calculate some # basic stats, then spit to stdout so that the output can be # grep'ped in the std unixy way.. # usage: stats < file.of.numbers # or # cmd1 | cmd2 |cmd3 | stats # eg, to calculate a summary of the bytes used in the current directory: # # 3 % ls -l | cut -c31-42 |stats # # Sum = 158401735 158.4 MB total # N = 503 in 503 files # Mean = 314913.986083499 average size of file is 315 KB # Median = 10204 median size is 10 KB # Mode (#) = 1024 (33) mode is 1024 due to 33 directories # Min = 0 at least 1 empty file # Max = 27135470 got a whomper of a file at 2.7 MB # Variance = 2903105341782.66 huge variance # Std Dev = 1703850.15238508 etc # SEM = 75970.9233614217 # Skew = 12.1873963279124 # Std Skew = 111.588464543135 # To use from nedit, put it in your /usr/local/bin and add the following # to your .nedit file under shell commands: # # stats:::IW:\n\ # stats\n # # # Feel free to add whatever additional calclations you want, but if you do and # you think they might be of general use, let me know so I can add them to the # original. # Bug reports, suggestions back to the author $N = 0; $sum = 0; $Min = $Max = 0; while (<>) { $x = split; for ($i = 0; $i < $x; $i++) { $sum = $sum + @_[$i]; # sum the numbers as they come in if ($N == 0) { $Min = $Max = @_[$i]; } if (@_[$i] < $Min) { $Min = @_[$i]; } if (@_[$i] > $Max) { $Max = @_[$i]; } $Data[$N++] = @_[$i]; # store them for calcing the SD, etc } } # All the numbers sucked in; now calc the values wanted # if want to get mode, median, would help to sort $Data @SData = sort numerically @Data; #for ($i = 0; $i < $N; $i++) { # print "\n $i :$Data[$i] \t $SData[$i]"; #} if ($N % 2 < 0.001) { # print " $N is even.\n"; #then $N is even and we can calc median via... $Median = ($SData[($N-1)/2] + $SData[(($N-1)+2)/2]) / 2; $even = 1; } else { # then $N is odd and we can calc median via... # print " $N is odd.\n"; $Median = ($SData[($N+1)/2]) ; $even = 0; } $Mean = $sum / $N; $SumDiffs2 = 0; $SumDiffs3 = 0; $MaxSoFarValCnt = 0; $ModeInd = 0; $ValCnt = 0; $Val = $SData[0]; for ($i=0; $i < $N; $i++){ $SumDiffs2 = $SumDiffs2 + (($Data[$i] - $Mean)**2); $SumDiffs3 = $SumDiffs3 + (($Data[$i] - $Mean)**3); # this next stanza calculates the Mode pointer if ($Val == $SData[$i]) { # if its another of the same #, incr the counters $ValCnt++; # print "ValCnt = $ValCnt\n"; $Val = $SData[$i]; } else { # it's a new value, so check if the run of the last set of #s # exceeds the longest so far # print "$MaxSoFarValCnt = $MaxSoFarValCnt \n"; if ($ValCnt > $MaxSoFarValCnt) { # and if so, replace the old values with the new 'winners' $MaxSoFarValCnt = $ValCnt; $ModeInd = $i-1; # print "ModeInd = $ModeInd \n"; } # and reset the counters for the new $ValCnt = 0; } $Val = $SData[$i]; } if ($MaxSoFarValCnt > 1) { $ModeNum = $MaxSoFarValCnt + 1; $Mode = $SData[$ModeInd]; } else { $ModeNum = "No # was represented more than once"; $Mode = "FLAT"; } $S2 = $SumDiffs2 / ($N - 1); $S = sqrt($S2); $SEM = $S / sqrt($N); print "\nSum = ", $sum, "\nN = ", $N, "\nMean = ", $Mean, "\nMedian = ", $Median, "\nMode (#) = ", $Mode, " ($ModeNum)", "\nMin = ", $Min, "\nMax = ", $Max, "\nVariance = ", $S2, "\nStd Dev = ", $S, "\nSEM = ", $SEM, "\n"; if ($S > 0 && $N > 3) { $Skew = ($N * $SumDiffs3) / (($N-1) * ($N-2) * ($S ** 3)); $StdSkew = $Skew / sqrt(6/$N); print "Skew = ", $Skew, "\nStd Skew = ", $StdSkew, "\n"; } else { print "Std Dev = 0 or N <=3; Skipping Skewness, Std Skewness cal'n.\n"; } exit 0; sub numerically { $a <=> $b; }