add a literal dupe check for the comments in spots

[spider.git] / perl / Spot.pm
diff --git a/perl/Spot.pm b/perl/Spot.pm

index a7b2f76e6661943c2ba98736eb6af6736217e79c..f331d105b540ae1cd2e4b414c631ffcfbf97eca4 100644 (file)
--- a/perl/Spot.pm
+++ b/perl/Spot.pm
@@ -36,7 +36,7 @@ $defaultspots = 10;                           # normal number of spots to return
  $maxdays = 100;                                # normal maximum no of days to go back
  $dirprefix = "spots";
  $duplth = 20;                                  # the length of text to use in the deduping
-$dupage = 3*3600;               # the length of time to hold spot dups
+$dupage = 1*3600;               # the length of time to hold spot dups
  $maxcalllth = 12;                               # the max length of call to take into account for dupes
  $filterdef = bless ([
                           # tag, sort, field, priv, special parser 
@@ -319,7 +319,7 @@ sub readfile($)
  # enter the spot for dup checking and return true if it is already a dup
  sub dup
  {
-       my ($freq, $call, $d, $text, $by) = @_; 
+       my ($freq, $call, $d, $text, $by, $cty) = @_; 
  
         # dump if too old
         return 2 if $d < $main::systime - $dupage;
@@ -328,37 +328,33 @@ sub dup
         $d = int ($d / 60);
         $d *= 60;
  
+       # remove SSID or area
+       $by =~ s|[-/]\d+$||;
+       
         $freq = sprintf "%.1f", $freq;       # normalise frequency
         $call = substr($call, 0, $maxcalllth) if length $call > $maxcalllth;
  
         chomp $text;
         $text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
         $text = uc unpad($text);
-       $text = substr($text, 0, $duplth) if length $text > $duplth; 
+       if ($cty && $text && length $text <= 4) {
+               unless ($text =~ /^C?Q/ || $text =~ /^\d+$/) {
+                       my @try = Prefix::cty_data($text);
+                       $text = "" if $cty == $try[0];
+               }
+       }
+       my $otext = $text;
         $text = pack("C*", map {$_ & 127} unpack("C*", $text));
-       my $ldupkey = "X$freq|$call|$by|";
+       $text =~ s/[^\w]//g;
+       $text = substr($text, 0, $duplth) if length $text > $duplth; 
+       my $ldupkey = "X$freq|$call|$by|$text";
         my $t = DXDupe::find($ldupkey);
-       if ($t && $t - $main::systime > 0) {
-               my ($prefix) = $text = /\b(\w{1,4})$/;
-               if ($prefix) {
-                       my @ans = Prefix::extract($prefix);
-                       if (@ans) {
-
-                               # if we find a prefix then chop it off
-                               # the end of the string and then look for
-                               # a spot with that text. If we find it then
-                               # it has be sucked from an AR-C node and is
-                               # a dupe.
-                               my $txt = $text;
-                               $txt =~ s/\b\w{1,4}$//;
-                               $txt =~ s/[^A-Z0-9]//g;
-                               $t = DXDupe::find($ldupkey . $txt);
-                               return 1 if $t && $t - $main::systime > 0;
-                       }
-               } 
-       }
-       $text =~ s/[^A-Z0-9]//g;
-       $ldupkey .= $text;
+       return 1 if $t && $t - $main::systime > 0;
+       DXDupe::add($ldupkey, $main::systime+$dupage);
+       $otext = substr($otext, 0, $duplth) if length $otext > $duplth; 
+       $ldupkey = "X$freq|$call|$by|$otext";
+       $t = DXDupe::find($ldupkey);
+       return 1 if $t && $t - $main::systime > 0;
         DXDupe::add($ldupkey, $main::systime+$dupage);
  #      my $sdupkey = "X$freq|$call|$by";
  #      $t = DXDupe::find($sdupkey);