#!/usr/bin/perl -w

use strict;
use HTML::PullParser;
use HTML::Tagset;

my @filters;
while( @ARGV && ! -e $ARGV[0] && $ARGV[0] =~ /^(\w+)(?:\@(\w+)(?:([~=])(.*))?)?$/ ) {
    push @filters, [ $1, $2 ];
    if( $3 ) {
        if( $3 eq "=" ) {
            $filters[-1]->[2]= $4;
        }
        else {
            $filters[-1]->[3]= qr/$4/;
        }
    }
    shift @ARGV;
}

if( @ARGV != 1 ) {
    print "usage: xpathdelhtml.pl <tag>[@<attribute>[=<string>]] [...] input.html\n";
    exit;
}

# strip photos off 4paddlers.com river guide:  'img@src~/[^w\d]'

my $data;

if( defined($ARGV[0]) ) {
    open IN, $ARGV[0] || die "cannot open source file $ARGV[0]";
    undef $/;
    $data= <IN>;
}
else {
    undef $/;
    $data= <STDIN>;
}

my $p= HTML::PullParser->new( doc => \$data,
        text => '"v", text',
        start => '"s", text, tagname, attr',
        end => '"e", text' );

my $depth= 0;
my $blockdepth;

token:
while( defined(my $tok= $p->get_token()) )
{
    my $block= defined($blockdepth) && $depth >= $blockdepth;
    if( $tok->[0] eq "e" && $depth > 0 ) {
        undef $blockdepth if defined($blockdepth) && $depth == $blockdepth;
        --$depth;
    }
    elsif( $tok->[0] eq "s" ) {
        my $emptytag= $HTML::Tagset::emptyElement{$tok->[2]};
        ++$depth unless $emptytag;
        for my $f (@filters) {
            if( $tok->[2] eq $f->[0] && 
                (!defined($f->[1]) || defined($tok->[3]->{$f->[1]})) &&
                (!defined($f->[2]) || $tok->[3]->{$f->[1]} eq $f->[2]) &&
                (!defined($f->[3]) || $tok->[3]->{$f->[1]} =~ $f->[3]) ) {
                next token if $emptytag;
                $blockdepth= $depth;
                next token;
            }
        }
    }
    print $tok->[1] unless $block;
}



