#include #include #include #include #include #define FILEBUFSZ 1048576 static char fbuf[FILEBUFSZ+1]; #define MAXTAGS 100 uint32_t tagkeys[MAXTAGS], tagvals[MAXTAGS]; uint64_t readtotal= 0, movetotal= 0; void readargs(int argc, char **argv); int readentry(size_t *readoff, uint64_t *id); void skipquoted(char **c); uint64_t getnum(char **c); int greptag(int type, const char *key, const char *value); uint32_t hash(const char *str); int main(int argc, char **argv) { uint64_t id; size_t readlen, readoff; int status, entrytype; if( argc == 1 || (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) ) { fprintf(stderr, "usage: osmtaggrep ...\n" "Reads a decompressed OSM XML file from stdin and outputs only nodes, ways and\n" "relations that have any of the given tag(s) set (to the given value(s) if\n" "present). Comparison of keys and values is by way of a hash function so there\n" "is a small chance of false positives.\n"); return 1; } readargs(argc, argv); readlen= fread(fbuf, 1, FILEBUFSZ, stdin); if( readlen == 0 ) { fprintf(stderr, "Could not read from stdin.\n"); return 2; } readoff= 0; readtotal= readlen; movetotal= 0; while( readlen ) { readlen += readoff; fbuf[readlen]= 0; readoff= 0; while(1) { entrytype= readentry(&readoff, &id); if( entrytype == -2 ) break; } if( readoff == 0 ) { if( readlen == FILEBUFSZ ) { fprintf(stderr, "Error: node too large for buffer, at offset %llu.\n", readtotal - readlen); // fprintf(stderr, "%s\n", fbuf); } break; } memmove(fbuf, fbuf+readoff, readlen-readoff); movetotal += readlen - readoff; readoff= readlen - readoff; readlen= fread(fbuf+readoff, 1, FILEBUFSZ-readoff, stdin); readtotal += readlen; // fprintf(stderr, "%lu bytes left, read %lu bytes\n", (unsigned long)readoff, (unsigned long)readlen); } return 0; } /* generate list of hashes of tag keys and values to grep for */ void readargs(int argc, char **argv) { char *eq; int argind, tagind; tagind= 0; for( argind= 1; argind< argc; ++argind, ++tagind ) { if( tagind == MAXTAGS ) { fprintf(stderr, "Too many arguments (max. %u allowed).\n", MAXTAGS); exit(1); } eq= strchr(argv[argind], '='); if( eq ) { *eq= 0; tagkeys[tagind]= hash(argv[argind]); tagvals[tagind]= hash(eq+1); } else tagkeys[tagind]= hash(argv[argind]); } } /* <- -2 end of buffer, -1 none, 0 node, 1 way, 2 relation */ int readentry(size_t *readoff, uint64_t *id) { char *read= fbuf + *readoff; char *tagkey, *tagval, *keyend, *valend; int type, print; char tagattr, quote, keyendc, valendc, tmp; print= 0; while( *read && *read != '<' ) ++read; if( ! *read ) return -2; *readoff= (size_t)(read - fbuf); ++read; if( !strncmp(read, "node ", 5) ) { type= 0; read += 4; } else if( !strncmp(read, "way ", 4) ) { type= 1; read += 3; } else if( !strncmp(read, "relation ", 9) ) { type= 2; read += 8; } else type= -1; while( *read && *read != '>' && (*read != ' ' || strncmp(read+1, "id=", 3)) && (*read != '/' || read[1] != '>') ) if( *read == '"' || *read == '\'' ) skipquoted(&read); else ++read; if( ! *read ) return -2; if( *read == ' ' ) { /* must be " id=" */ read += 4; *id= getnum(&read); while( *read && *read != '>' && (*read != '/' || read[1] != '>') ) if( *read == '"' || *read == '\'' ) skipquoted(&read); else ++read; } else *id= 0; if( *read == '/' ) { /* XML tag without content - no OSM tags */ read += 2; *readoff= (size_t)(read - fbuf); return type; } if( type < 0 ) { ++read; *readoff= (size_t)(read - fbuf); return type; } /* *read must now be '>', so we may have sub-tags such as */ while( 1 ) { while( *read && *read != '<' ) ++read; if( ! *read ) return -2; if( read[1] == '/' ) { /* etc. */ read += 2; while( *read && *read != '>' ) ++read; if( ! *read ) return -2; ++read; if( print ) { tmp= *read; *read= 0; printf("%s\n", fbuf + *readoff); *read= tmp; } *readoff= (size_t)(read - fbuf); return type; } ++read; if( !strncmp(read, "tag ", 4) ) { read += 3; tagkey= tagval= NULL; keyendc= valendc= 0; while( *read && *read != '>' ) { if( *read == ' ' && (read[1] == 'k' || read[1] == 'v') && read[2] == '=' ) { tagattr= read[1]; read += 3; if( *read == '"' || *read == '\'' ) quote= *read++; else quote= 0; if( tagattr == 'k' ) { tagkey= read; keyendc= quote; } else { tagval= read; valendc= quote; } if( quote ) { while( *read && *read != quote ) ++read; if( tagattr == 'k' ) keyend= read; else valend= read; if( *read ) *read++ = 0; } else { while( *read && isalnum(*read) ) ++read; if( tagattr == 'k' ) { keyend= read; keyendc= *read; } else { valend= read; valendc= *read; } if( *read == '>' ) { *read++ = 0; break; } if( *read ) *read++ = 0; } } else if( *read == '"' || *read == '\'' ) skipquoted(&read); else ++read; } if( ! *read && ! (keyendc && read == keyend || valendc && read == valend) ) { if( keyendc ) *keyend= keyendc; if( valendc ) *valend= valendc; return -2; } if( greptag(type, tagkey, tagval) ) print= 1; if( keyendc ) *keyend= keyendc; if( valendc ) *valend= valendc; } while( *read && *read != '>' ) ++read; } } void skipquoted(char **c) { char *read= *c; char quote; if( *read != '"' && *read != '\'' ) { if( *read ) ++*c; return; } quote= *read; ++read; while( *read && *read != quote ) ++read; if( *read ) ++read; *c= read; } uint64_t getnum(char **c) { char *read= *c; uint64_t num; if( *read == '"' || *read == '\'' ) ++read; num= strtoull(read, (char**)&read, 10); if( *read == '"' || *read == '\'' ) ++read; *c= read; return num; } /* check if OSM tag is on the grep list */ int greptag(int type, const char *key, const char *value) { uint32_t keyhash, valhash; int tagind; if( ! key ) return 0; keyhash= hash(key); if( value ) valhash= hash(value); else valhash= 0; for( tagind= 0; tagind< MAXTAGS && tagkeys[tagind]; ++tagind ) if( tagkeys[tagind] == keyhash && (!tagvals[tagind] || tagvals[tagind] == valhash) ) return 1; return 0; } /* One-at-a-time hash by Bob Jenkins */ uint32_t hash(const char *str) { uint32_t h= 0; while( *str ) { h += *str++; h += (h << 10); h ^= (h >> 6); } h += (h << 3); h ^= (h >> 11); h += (h << 15); return h; }