X-Git-Url: http://gb7djk.dxcluster.net/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2FDXDupe.pm;fp=perl%2FDXDupe.pm;h=2ab0ca8cee3c81c15e677dafa8d855511186bb10;hb=44bab9382ffb0bf12639af84729d1c42ac4d9ae2;hp=0000000000000000000000000000000000000000;hpb=af0e6bdd59291e0fb872660f261ad720288068b5;p=spider.git diff --git a/perl/DXDupe.pm b/perl/DXDupe.pm new file mode 100644 index 00000000..2ab0ca8c --- /dev/null +++ b/perl/DXDupe.pm @@ -0,0 +1,84 @@ +# +# class to handle all dupes in the system +# +# each dupe entry goes into a tied hash file +# +# the only thing this class really does is provide a +# mechanism for storing and checking dups +# + +package DXDupe; + +use DXDebug; +use DXUtil; +use DXVars; + +use vars qw{$lasttime $dbm %d $default $fn}; + +$default = 48*24*60*60; +$lasttime = 0; +$fn = "$main::data/dupefile"; + +sub init +{ + $dbm = tie (%d, 'DB_File', $fn) or confess "can't open dupe file: $fn ($!)"; +} + +sub finish +{ + undef $dbm; + untie %d; +} + +sub check +{ + my ($s, $t) = @_; + return 1 if exists $d{$s}; + $t = $main::systime + $default unless $t; + $d{$s} = $t; + return 0; +} + +sub del +{ + my $s = shift; + delete $d{$s}; +} + +sub process +{ + # once an hour + if ($main::systime - $lasttime >= 3600) { + while (($k, $v) = each %d) { + delete $d{$k} if $main::systime >= $v; + } + $lasttime = $main::systime; + } +} + +sub get +{ + my $start = shift; + my @out; + while (($k, $v) = each %d) { + push @out, $k, $v if !$start || $k =~ /^$start/; + } + return @out; +} + +sub listdups +{ + my $let = shift; + my $dupage = shift; + my $regex = shift; + + $regex =~ s/[\^\$\@\%]//g; + $regex = "^$let" . $regex; + my @out; + for (sort { $d{$a} <=> $d{$b} } grep { m{$regex}i } keys %d) { + my ($dum, $key) = unpack "a1a*", $_; + push @out, "$key = " . cldatetime($d{$_} - $dupage); + } + return @out; +} +1;