9723229abdf51b4867350cbb3b715b4f62c9d509
[spider.git] / perl / QRZ.pm
1 #!/usr/bin/perl -w
2
3 package QRZ;
4
5 use HTML::Parser;
6 use Data::Dumper;
7
8 @ISA = qw( HTML::Parser );
9
10 use strict;
11
12 sub new
13 {
14     my $pkg = shift;
15         my $self = SUPER::new $pkg;
16         $self->{list} = [];
17         $self->{state} = 'pre';
18     $self->{sort} = undef;
19         $self->{debug} = 0;
20     $self->{call} = uc shift;
21         return $self;
22 }
23
24 sub start
25 {
26         my ($self, $tag, $attr, $attrseq, $origtext) = @_;
27         if ($self->{debug}) {
28                 print "$self->{state} $tag";
29         if ($attr) {
30                         my $dd = new Data::Dumper([$attr], [qw(attr)]);
31                         $dd->Terse(1);
32                         $dd->Indent(0);
33                         $dd->Quotekeys(0);
34                         print " ", $dd->Dumpxs;
35                 }
36                 print "\n";
37         }
38         if ($self->{state} eq 'addr') {
39                 if ($tag eq 'br') {
40                         $self->{addr} .= ", " if length $self->{addr} > $self->{laddr};
41                         $self->{laddr} = length $self->{addr};
42                 } elsif ($tag eq 'p') {
43             push @{$self->{list}}, $self->{addr} ? "$self->{call}|addr|$self->{addr}" : "$self->{call}|addr|unknown";
44                         $self->state('semail');
45                 }
46         } elsif ($self->{state} eq 'email') {
47                 if ($tag eq 'a') {
48                         my $email = $attr->{href};
49                         if ($email) {
50                                 return if $email =~ m{/uedit.html};
51                                 $email =~ s/mailto://i;
52                                 push @{$self->{list}}, "$self->{call}|email|$email";
53                         }
54                 } elsif ($tag eq 'br' || $tag eq 'p') {
55                         $self->state('post');
56                 }
57         }
58 }
59
60 sub text
61 {
62         my ($self, $text) = @_;
63         $text =~ s/^[\s\r\n]+//g;
64         $text =~ s/[\s\r\n]+$//g;
65     print "$self->{state} text $text\n" if $self->{debug};      
66         if (length $text) {
67                 if ($self->{state} eq 'pre' && $text =~ /$self->{call}/i ) {
68                         $self->state('addr');
69                         $self->{addr} = "";
70                         $self->{laddr} = 0;
71                 } elsif ($self->{state} eq 'addr') {
72                         $text =~ s/\ //gi;
73                         $self->{addr} .= $text;
74                 } elsif ($self->{state} eq 'semail' && $text =~ /Email/i ) {
75                         $self->state('email');
76                 }
77         }
78 }
79
80 sub state
81 {
82         my $self = shift;
83         $self->{state} = shift if @_;
84         return $self->{state};
85 }
86
87 sub end
88 {
89         my ($self, $tag, $origtext) = @_;
90     print "$self->{state} /$tag\n" if $self->{debug};
91 }
92
93 sub debug
94 {
95         my ($self, $val) = @_;
96         $self->{debug} = $val;
97 }
98
99 sub answer
100 {
101         my $self = shift;
102         return @{$self->{list}};
103 }
104
105 1;
106