// compile with: gcc -o histo histo.c -lm #include #include #include #include #include #define LINEBUFLEN 1024 int main(int argc, char **argv) { char linebuf[LINEBUFLEN]; double *limits, *binsum, *binsqrsum; int *histo; char *format; double left, right, step, value, previous, histval, sum, sqrsum; int col, nbins, bin, count, line, nvalues, outofrange, diffhisto; if( argc!=5 && (argc!=6 || strcmp(argv[1], "-d")) ) { fprintf(stderr, "usage: histo [-d] \n" "Reads numerical data from stdin and outputs a histogram of column " " (0-based) on stdout. Values between and " " contribute to the histogram; the bin width is . If the -d option is given, a histogram of successive " "differences of input values is created.\n" "The histogram is printed to stdout and general information (number of " "values read, values out of range, mean and RMS deviation) is printed " "to stderr. Each line of the histogram output contains the following " "numbers separated by tabs: The bin midpoint, the number of values in " "this bin, the mean and the RMS deviation of all values in this bin. " "All bins except the rightmost include their left boundary, but not " "their right boundary; the rightmost bin includes both boundaries.\n"); exit(0); } diffhisto= argc==6; col= strtol(argv[argc-4], NULL, 0); if( col< 0 ) { fprintf(stderr, "histo: Column must be >= 0. Type histo --help for usage information.\n"); exit(-1); } left= strtod(argv[argc-3], NULL); right= strtod(argv[argc-2], NULL); step= strtod(argv[argc-1], NULL); nbins= (int)floor( (right-left)/step + 0.5 ); if( nbins==0 ) nbins= 1; step= (right-left)/nbins; if( nbins<=0 || !finite(step) || step < 0.0 ) { fprintf(stderr, "histo: Obtained illegal values for number of bins and/or true bin size. Type histo --help for usage information.\n"); exit(-1); } histo= calloc(sizeof(int), nbins); limits= calloc(sizeof(double), nbins-1); binsum= calloc(sizeof(double), nbins); binsqrsum= calloc(sizeof(double), nbins); format= malloc(3*col+10); for(bin= 0; bin< nbins-1; ++bin) limits[bin]= left + (right-left)*(double)(bin+1)/(double)nbins; for(count= 0; count< 3*col; ) { format[count++]= '%'; format[count++]= '*'; format[count++]= 's'; } strcpy(format+count, "%lg"); line= 0; nvalues= 0; outofrange= 0; previous= 0; sum= sqrsum= 0; while( 13 ) { if( !fgets(linebuf, LINEBUFLEN, stdin) ) break; ++line; if( (count= sscanf(linebuf, format, &value)) < 1 ) { fprintf(stderr, "histo: Could not read value in line %d.\n", line); continue; } if( !finite(value) ) { fprintf(stderr, "histo: Infinite value in line %d.\n", line); continue; } ++nvalues; if( diffhisto ) histval= value-previous; else histval= value; previous= value; if( diffhisto && nvalues==1 ) continue; sum += histval; sqrsum += histval*histval; if( histval < left || histval > right ) { ++outofrange; continue; } for( bin= 0; bin< nbins-1; ++bin ) if( histval < limits[bin] ) { ++histo[bin]; binsum[bin] += histval; binsqrsum[bin] += histval*histval; break; } if( bin == nbins-1 ) { ++histo[bin]; binsum[bin] += histval; binsqrsum[bin] += histval*histval; } } if( nvalues > 0 ) { for( bin= 0; bin< nbins; ++bin ) { binsum[bin] /= histo[bin]; binsqrsum[bin] /= histo[bin]; if( !finite(binsum[bin]) || !finite(binsqrsum[bin]) ) binsum[bin]= binsqrsum[bin]= 0; } printf("%g\t%d\t%g\t%g\n", left+step/2, histo[0], binsum[0], sqrt(binsqrsum[0]-binsum[0]*binsum[0])); for( bin= 0; bin< nbins-1; ++bin ) printf("%g\t%d\t%g\t%g\n", limits[bin]+step/2, histo[bin+1], binsum[bin+1], binsqrsum[bin+1]-binsum[bin+1]*binsum[bin+1]); if( diffhisto ) { --nvalues; fprintf(stderr, "histo: Read %d valid values, formed %d differences, %d out of range given.\n", nvalues+1, nvalues, outofrange); } else fprintf(stderr, "histo: Read %d valid values, %d out of range given.\n", nvalues, outofrange); sum /= nvalues; sqrsum /= nvalues; fprintf(stderr, "histo: Total mean %g, RMS deviation %g\n", sum, sqrt(sqrsum-sum*sum)); } free(format); free(histo); free(limits); return 0; }