From: minima <minima>
Date: Wed, 23 Aug 2000 13:59:16 +0000 (+0000)
Subject: added DXDupe for persistant dupes (and to allow dup checking for other
X-Git-Tag: R_1_44~21
X-Git-Url: http://gb7djk.dxcluster.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=44bab9382ffb0bf12639af84729d1c42ac4d9ae2;p=spider.git

added DXDupe for persistant dupes (and to allow dup checking for other
things as well - as required)
---

diff --git a/Changes b/Changes
index 80f46a0e..c3797703 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,6 @@
+23Aug00=======================================================================
+1. Added persistant dupe file so that all dupes are stored here (including
+announces) - announces are now kept for 5 days (as default).
 20Aug00=======================================================================
 1. Added system Alias for set/nodxgrid => unset/dxgrid
 2. Add full individual checking for all PC protocol fields in all messages
diff --git a/perl/AnnTalk.pm b/perl/AnnTalk.pm
index 911c2724..987b885d 100644
--- a/perl/AnnTalk.pm
+++ b/perl/AnnTalk.pm
@@ -12,48 +12,28 @@ use strict;
 
 use DXUtil;
 use DXDebug;
+use DXDupe;
 
 use vars qw(%dup $duplth $dupage);
 
-%dup = ();						# the duplicates hash
 $duplth = 60;					# the length of text to use in the deduping
-$dupage = 24*3600;               # the length of time to hold spot dups
+$dupage = 5*24*3600;			# the length of time to hold spot dups
 
 # enter the spot for dup checking and return true if it is already a dup
 sub dup
 {
 	my ($call, $to, $text) = @_; 
-	my $d = $main::systime;
 
 	chomp $text;
 	unpad($text);
 	$text = substr($text, 0, $duplth) if length $text > $duplth; 
-	my $dupkey = "$to|$text";
-	return 1 if exists $dup{$dupkey};
-	$dup{$dupkey} = $d;         # in seconds (to the nearest minute)
-	return 0; 
-}
-
-# called every hour and cleans out the dup cache
-sub process
-{
-	my $cutoff = $main::systime - $dupage;
-	while (my ($key, $val) = each %dup) {
-		delete $dup{$key} if $val < $cutoff;
-	}
+	my $dupkey = "A$to|$text";
+	return DXDupe::check($dupkey, $main::systime + $dupage);
 }
 
 sub listdups
 {
-	my $regex = shift;
-	$regex = '.*' unless $regex;
-	$regex =~ s/[\$\@\%]//g;
-	my @out;
-	for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) {
-		my $val = $dup{$_};
-		push @out, "$_ = " . cldatetime($val);
-	}
-	return @out;
+	return DXDupe::listdups('A', $dupage, @_);
 }
 
 
diff --git a/perl/DXDupe.pm b/perl/DXDupe.pm
new file mode 100644
index 00000000..2ab0ca8c
--- /dev/null
+++ b/perl/DXDupe.pm
@@ -0,0 +1,84 @@
+#
+# class to handle all dupes in the system
+#
+# each dupe entry goes into a tied hash file 
+#
+# the only thing this class really does is provide a
+# mechanism for storing and checking dups
+#
+
+package DXDupe;
+
+use DXDebug;
+use DXUtil;
+use DXVars;
+
+use vars qw{$lasttime $dbm %d $default $fn};
+
+$default = 48*24*60*60;
+$lasttime = 0;
+$fn = "$main::data/dupefile";
+
+sub init
+{
+	$dbm = tie (%d, 'DB_File', $fn) or confess "can't open dupe file: $fn ($!)";
+}
+
+sub finish
+{
+	undef $dbm;
+	untie %d;
+}
+
+sub check
+{
+	my ($s, $t) = @_;
+	return 1 if exists $d{$s};
+	$t = $main::systime + $default unless $t;
+	$d{$s} = $t;
+	return 0;
+}
+
+sub del
+{
+	my $s = shift;
+	delete $d{$s};
+}
+
+sub process
+{
+	# once an hour
+	if ($main::systime - $lasttime >=  3600) {
+		while (($k, $v) = each %d) {
+			delete $d{$k} if $main::systime >= $v;
+		}
+		$lasttime = $main::systime;
+	}
+}
+
+sub get
+{
+	my $start = shift;
+	my @out;
+	while (($k, $v) = each %d) {
+		push @out, $k, $v if !$start || $k =~ /^$start/; 
+	}
+	return @out;
+}
+
+sub listdups
+{
+	my $let = shift;
+	my $dupage = shift;
+	my $regex = shift;
+
+	$regex =~ s/[\^\$\@\%]//g;
+	$regex = "^$let" . $regex;
+	my @out;
+	for (sort { $d{$a} <=> $d{$b} } grep { m{$regex}i } keys %d) {
+		my ($dum, $key) = unpack "a1a*", $_;
+		push @out, "$key = " . cldatetime($d{$_} - $dupage);
+	}
+	return @out;
+}
+1;
diff --git a/perl/DXProt.pm b/perl/DXProt.pm
index 09708332..e7f3c7c0 100644
--- a/perl/DXProt.pm
+++ b/perl/DXProt.pm
@@ -175,16 +175,16 @@ sub init
 	$me->{state} = "indifferent";
 	do "$main::data/hop_table.pl" if -e "$main::data/hop_table.pl";
 	confess $@ if $@;
-	#  $me->{sort} = 'M';    # M for me
+	$me->{sort} = 'S';    # S for spider
 
 	# now prime the spot and wwv  duplicates file with data
-    my @today = Julian::unixtoj(time);
-	for (Spot::readfile(@today), Spot::readfile(Julian::sub(@today, 1))) {
-		Spot::dup(@{$_}[0..3]);
-	}
-	for (Geomag::readfile(time)) {
-		Geomag::dup(@{$_}[1..5]);
-	}
+#    my @today = Julian::unixtoj(time);
+#	for (Spot::readfile(@today), Spot::readfile(Julian::sub(@today, 1))) {
+#		Spot::dup(@{$_}[0..3]);
+#	}
+#	for (Geomag::readfile(time)) {
+#		Geomag::dup(@{$_}[1..5]);
+#	}
 
 	# load the baddx file
 	do "$baddxfn" if -e "$baddxfn";
@@ -1027,9 +1027,9 @@ sub process
 	my $val;
 	my $cutoff;
 	if ($main::systime - 3600 > $last_hour) {
-		Spot::process;
-		Geomag::process;
-		AnnTalk::process;
+#		Spot::process;
+#		Geomag::process;
+#		AnnTalk::process;
 		$last_hour = $main::systime;
 	}
 }
diff --git a/perl/Geomag.pm b/perl/Geomag.pm
index 05aefeff..037dcc50 100644
--- a/perl/Geomag.pm
+++ b/perl/Geomag.pm
@@ -16,11 +16,12 @@ use DXLog;
 use Julian;
 use IO::File;
 use DXDebug;
+use DXDupe;
 
 use strict;
 use vars qw($date $sfi $k $a $r $forecast @allowed @denied $fp $node $from 
             $dirprefix $param
-            %dup $duplth $dupage);
+            $duplth $dupage);
 
 $fp = 0;						# the DXLog fcb
 $date = 0;						# the unix time of the WWV (notional)
@@ -33,7 +34,6 @@ $node = "";						# originating node
 $from = "";						# who this came from
 @allowed = ();					# if present only these callsigns are regarded as valid WWV updators
 @denied = ();					# if present ignore any wwv from these callsigns
-%dup = ();						# the spot duplicates hash
 $duplth = 20;					# the length of text to use in the deduping
 $dupage = 12*3600;				# the length of time to hold spot dups
 
@@ -252,34 +252,13 @@ sub dup
 	# dump if too old
 	return 2 if $d < $main::systime - $dupage;
  
-#	chomp $text;
-#	$text = substr($text, 0, $duplth) if length $text > $duplth; 
-	my $dupkey = "$d|$sfi|$k|$a";
-	return 1 if exists $dup{$dupkey};
-	$dup{$dupkey} = $d;         # in seconds (to the nearest minute)
-	return 0; 
-}
-
-# called every hour and cleans out the dup cache
-sub process
-{
-	my $cutoff = $main::systime - $dupage;
-	while (my ($key, $val) = each %dup) {
-		delete $dup{$key} if $val < $cutoff;
-	}
+	my $dupkey = "W$d|$sfi|$k|$a";
+	return DXDupe::check($dupkey, $main::systime+$dupage);
 }
 
 sub listdups
 {
-	my $regex = shift;
-	$regex = '.*' unless $regex;
-	$regex =~ s/[\$\@\%]//g;
-	my @out;
-	for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) {
-		my $val = $dup{$_};
-		push @out, "$_ = " . cldatetime($val);
-	}
-	return @out;
+	return DXDupe::listdups('W', $dupage, @_);
 }
 1;
 __END__;
diff --git a/perl/Spot.pm b/perl/Spot.pm
index 1e7de69a..e7a619e0 100644
--- a/perl/Spot.pm
+++ b/perl/Spot.pm
@@ -15,16 +15,16 @@ use DXUtil;
 use DXLog;
 use Julian;
 use Prefix;
+use DXDupe;
 
 use strict;
-use vars qw($fp $maxspots $defaultspots $maxdays $dirprefix %dup $duplth $dupage);
+use vars qw($fp $maxspots $defaultspots $maxdays $dirprefix $duplth $dupage);
 
 $fp = undef;
 $maxspots = 50;					# maximum spots to return
 $defaultspots = 10;				# normal number of spots to return
 $maxdays = 35;					# normal maximum no of days to go back
 $dirprefix = "spots";
-%dup = ();						# the spot duplicates hash
 $duplth = 20;					# the length of text to use in the deduping
 $dupage = 3*3600;               # the length of time to hold spot dups
 
@@ -215,32 +215,13 @@ sub dup
 	chomp $text;
 	$text = substr($text, 0, $duplth) if length $text > $duplth; 
 	unpad($text);
-	my $dupkey = "$freq|$call|$d|$text";
-	return 1 if exists $dup{$dupkey};
-	$dup{$dupkey} = $d;         # in seconds (to the nearest minute)
-	return 0; 
-}
-
-# called every hour and cleans out the dup cache
-sub process
-{
-	my $cutoff = $main::systime - $dupage;
-	while (my ($key, $val) = each %dup) {
-		delete $dup{$key} if $val < $cutoff;
-	}
+	my $dupkey = "X$freq|$call|$d|$text";
+	return DXDupe::check($dupkey, $main::systime+$dupage);
 }
 
 sub listdups
 {
-	my $regex = shift;
-	$regex = '.*' unless $regex;
-	$regex =~ s/[\$\@\%]//g;
-	my @out;
-	for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) {
-		my $val = $dup{$_};
-		push @out, "$_ = " . cldatetime($val);
-	}
-	return @out;
+	return DXDupe::listdups('X', $dupage, @_);
 }
 1;
 
diff --git a/perl/WCY.pm b/perl/WCY.pm
index 20b6a184..f3202dd4 100644
--- a/perl/WCY.pm
+++ b/perl/WCY.pm
@@ -20,7 +20,7 @@ use Data::Dumper;
 use strict;
 use vars qw($date $sfi $k $expk $a $r $sa $gmf $au  @allowed @denied $fp $node $from 
             $dirprefix $param
-            %dup $duplth $dupage);
+            $duplth $dupage);
 
 $fp = 0;						# the DXLog fcb
 $date = 0;						# the unix time of the WWV (notional)
@@ -35,7 +35,6 @@ $node = "";						# originating node
 $from = "";						# who this came from
 @allowed = ();					# if present only these callsigns are regarded as valid WWV updators
 @denied = ();					# if present ignore any wwv from these callsigns
-%dup = ();						# the spot duplicates hash
 $duplth = 20;					# the length of text to use in the deduping
 $dupage = 12*3600;				# the length of time to hold spot dups
 
@@ -227,34 +226,13 @@ sub dup
 	# dump if too old
 	return 2 if $d < $main::systime - $dupage;
  
-#	chomp $text;
-#	$text = substr($text, 0, $duplth) if length $text > $duplth; 
-	my $dupkey = "$d|$sfi|$k|$a|$r";
-	return 1 if exists $dup{$dupkey};
-	$dup{$dupkey} = $d;         # in seconds (to the nearest minute)
-	return 0; 
-}
-
-# called every hour and cleans out the dup cache
-sub process
-{
-	my $cutoff = $main::systime - $dupage;
-	while (my ($key, $val) = each %dup) {
-		delete $dup{$key} if $val < $cutoff;
-	}
+	my $dupkey = "C$d|$sfi|$k|$a|$r";
+	return DXDupe::check($dupkey, $main::systime+$dupage);
 }
 
 sub listdups
 {
-	my $regex = shift;
-	$regex = '.*' unless $regex;
-	$regex =~ s/[\$\@\%]//g;
-	my @out;
-	for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) {
-		my $val = $dup{$_};
-		push @out, "$_ = " . cldatetime($val);
-	}
-	return @out;
+	return DXDupe::listdups('C', $dupage, @_);
 }
 1;
 __END__;
diff --git a/perl/cluster.pl b/perl/cluster.pl
index ec1030e8..dfae3278 100755
--- a/perl/cluster.pl
+++ b/perl/cluster.pl
@@ -61,6 +61,7 @@ use Filter;
 use DXDb;
 use AnnTalk;
 use WCY;
+use DXDupe;
 
 use Data::Dumper;
 use Fcntl ':flock'; 
@@ -219,6 +220,7 @@ sub cease
 	Msg->event_loop(1, 0.05);
 	Msg->event_loop(1, 0.05);
 	DXUser::finish();
+	DXDupe::finish();
 
 	# close all databases
 	DXDb::closeall;
@@ -340,6 +342,9 @@ for (keys %SIG) {
 	}
 }
 
+# start dupe system
+DXDupe::init();
+
 # read in system messages
 DXM->init();
 
@@ -410,6 +415,8 @@ for (;;) {
 		DXMsg::process();
 		DXDb::process();
 		DXUser::process();
+		DXDupe::process();
+		
 		eval { 
 			Local::process();       # do any localised processing
 		};