mirror of
https://github.com/imapsync/imapsync.git
synced 2025-06-05 12:17:16 +02:00
559 lines
17 KiB
Perl
Executable file
559 lines
17 KiB
Perl
Executable file
use warnings;
|
|
use strict;
|
|
|
|
package Mail::IMAPClient::BodyStructure;
|
|
our $VERSION = '0.0.4';
|
|
|
|
use Mail::IMAPClient::BodyStructure::Parse;
|
|
|
|
# my has file scope, not limited to package!
|
|
my $parser = Mail::IMAPClient::BodyStructure::Parse->new
|
|
or die "Cannot parse rules: $@\n"
|
|
. "Try remaking Mail::IMAPClient::BodyStructure::Parse.\n";
|
|
|
|
sub new
|
|
{ my $class = shift;
|
|
my $bodystructure = shift;
|
|
|
|
my $self = $parser->start($bodystructure)
|
|
or return undef;
|
|
|
|
$self->{_prefix} = "";
|
|
$self->{_id} = exists $self->{bodystructure} ? 'HEAD' : 1;
|
|
$self->{_top} = 1;
|
|
|
|
bless $self, ref($class)||$class;
|
|
}
|
|
|
|
sub _get_thingy
|
|
{ my $thingy = shift;
|
|
my $object = shift || (ref $thingy ? $thingy : undef);
|
|
|
|
unless ($object && ref $object)
|
|
{ warn $@ = "No argument passed to $thingy method.";
|
|
return undef;
|
|
}
|
|
|
|
unless(UNIVERSAL::isa($object, 'HASH') && exists $object->{$thingy})
|
|
{ my $a = $thingy =~ /^[aeiou]/i ? 'an' : 'a';
|
|
my $has = ref $object eq 'HASH' ? join(", ",keys %$object) : '';
|
|
warn $@ = ref($object)." $object does not have $a $thingy. "
|
|
. ($has ? "It has $has" : '');
|
|
return undef;
|
|
}
|
|
|
|
my $value = $object->{$thingy};
|
|
$value =~ s/\\ ( [\\\(\)"\x0d\x0a] )/$1/gx;
|
|
$value =~ s/^"(.*)"$/$1/;
|
|
$value;
|
|
}
|
|
|
|
BEGIN
|
|
{ no strict 'refs';
|
|
foreach my $datum (
|
|
qw/ bodytype bodysubtype bodyparms bodydisp bodyid bodydesc bodyenc
|
|
bodysize bodylang envelopestruct textlines / )
|
|
{ *$datum = sub { _get_thingy($datum, @_) };
|
|
}
|
|
}
|
|
|
|
sub parts
|
|
{ my $self = shift;
|
|
return wantarray ? @{$self->{PartsList}} : $self->{PartsList}
|
|
if exists $self->{PartsList};
|
|
|
|
my @parts;
|
|
$self->{PartsList} = \@parts;
|
|
|
|
unless(exists $self->{bodystructure})
|
|
{ $self->{PartsIndex}{1} = $self;
|
|
@parts = ("HEAD", 1);
|
|
return wantarray ? @parts : \@parts;
|
|
}
|
|
|
|
foreach my $p ($self->bodystructure)
|
|
{ my $id = $p->id;
|
|
push @parts, $id;
|
|
$self->{PartsIndex}{$id} = $p ;
|
|
my $type = uc $p->bodytype || '';
|
|
|
|
push @parts, "$id.HEAD"
|
|
if $type eq 'MESSAGE';
|
|
}
|
|
|
|
wantarray ? @parts : \@parts;
|
|
}
|
|
|
|
sub bodystructure
|
|
{ my $self = shift;
|
|
my $partno = 0;
|
|
my @parts;
|
|
|
|
if($self->{_top})
|
|
{ $self->{_id} ||= "HEAD";
|
|
$self->{_prefix} ||= "HEAD";
|
|
$partno = 0;
|
|
foreach my $b ( @{$self->{bodystructure}} )
|
|
{ $b->{_id} = ++$partno;
|
|
$b->{_prefix} = $partno;
|
|
push @parts, $b, $b->bodystructure;
|
|
}
|
|
return wantarray ? @parts : \@parts;
|
|
}
|
|
|
|
my $prefix = $self->{_prefix} || "";
|
|
$prefix =~ s/\.?$/./;
|
|
|
|
foreach my $p ( @{$self->{bodystructure}} )
|
|
{ $partno++;
|
|
$p->{_prefix} = "$prefix$partno";
|
|
$p->{_id} ||= "$prefix$partno";
|
|
push @parts, $p, $p->{bodystructure} ? $p->bodystructure : ();
|
|
}
|
|
|
|
wantarray ? @parts : \@parts;
|
|
}
|
|
|
|
sub id
|
|
{ my $self = shift;
|
|
return $self->{_id}
|
|
if exists $self->{_id};
|
|
|
|
return "HEAD"
|
|
if $self->{_top};
|
|
|
|
if ($self->{bodytype} eq 'MULTIPART')
|
|
{ my $p = $self->{_id} || $self->{_prefix};
|
|
$p =~ s/\.$//;
|
|
return $p;
|
|
}
|
|
else
|
|
{ return $self->{_id} ||= 1;
|
|
}
|
|
}
|
|
|
|
package Mail::IMAPClient::BodyStructure::Part;
|
|
our @ISA = qw/Mail::IMAPClient::BodyStructure/;
|
|
|
|
package Mail::IMAPClient::BodyStructure::Envelope;
|
|
our @ISA = qw/Mail::IMAPClient::BodyStructure/;
|
|
|
|
sub new
|
|
{ my ($class, $envelope) = @_;
|
|
$parser->envelope($envelope);
|
|
}
|
|
|
|
sub from_addresses { shift->_addresses(from => 1) }
|
|
sub sender_addresses { shift->_addresses(sender => 1) }
|
|
sub replyto_addresses { shift->_addresses(replyto => 1) }
|
|
sub to_addresses { shift->_addresses(to => 0) }
|
|
sub cc_addresses { shift->_addresses(cc => 0) }
|
|
sub bcc_addresses { shift->_addresses(bcc => 0) }
|
|
|
|
sub _address($$$)
|
|
{ my ($self, $name, $isSender) = @_;
|
|
ref $self->{$name} eq 'ARRAY'
|
|
or return ();
|
|
|
|
my @list;
|
|
foreach ( @{$self->{$name}} )
|
|
{ my $pn = $_->personalname;
|
|
my $name = $pn && $pn ne 'NIL' ? "$pn " : '';
|
|
push @list, $pn. '<'.$_->mailboxname .'@'. $_->hostname.'>';
|
|
}
|
|
|
|
wantarray ? @list
|
|
: $isSender ? $list[0]
|
|
: \@list;
|
|
}
|
|
|
|
BEGIN
|
|
{ no strict 'refs';
|
|
for my $datum ( qw(subject inreplyto from messageid bcc date
|
|
replyto to sender cc))
|
|
{ *$datum = sub { @_ > 1 ? $_[0]->{$datum} = $_[1] : $_[0]->{$datum} }
|
|
}
|
|
}
|
|
|
|
|
|
package Mail::IMAPClient::BodyStructure::Address;
|
|
@ISA = qw/Mail::IMAPClient::BodyStructure/;
|
|
|
|
for my $datum ( qw(personalname mailboxname hostname sourcename) )
|
|
{ no strict 'refs';
|
|
*$datum = sub { shift->{$datum}; };
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Mail::IMAPClient::BodyStructure - parse fetched results
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Mail::IMAPClient::BodyStructure;
|
|
use Mail::IMAPClient;
|
|
|
|
my $imap = Mail::IMAPClient->new(Server=>$serv,User=>$usr,Password=>$pwd);
|
|
$imap->select("INBOX") or die "cannot select the inbox for $usr: $@\n";
|
|
|
|
my @recent = $imap->search("recent");
|
|
|
|
foreach my $id (@recent)
|
|
{ my $fetched = $imap->fetch($id, "bodystructure");
|
|
my $struct = Mail::IMAPClient::BodyStructure->new($fetched);
|
|
|
|
my $mime = $struct->bodytype."/".$struct->bodysubtype;
|
|
my $parts =join "\n\t", $struct->parts;
|
|
print "Msg $id (Content-type: $mime) contains these parts:\n\t$parts\n";
|
|
}
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This extension will parse the result of an IMAP FETCH BODYSTRUCTURE
|
|
command into a perl data structure. It also provides helper methods that
|
|
will help you pull information out of the data structure.
|
|
|
|
Use of this extension requires Parse::RecDescent. If you don't have
|
|
Parse::RecDescent then you must either get it or refrain from using
|
|
this module.
|
|
|
|
=head2 EXPORT
|
|
|
|
Nothing is exported by default. C<$parser> is exported upon
|
|
request. C<$parser> is the BodyStucture object's Parse::RecDescent object,
|
|
which you'll probably only need for debugging purposes.
|
|
|
|
=head1 Class Methods
|
|
|
|
The following class method is available:
|
|
|
|
=head2 new
|
|
|
|
This class method is the constructor method for instantiating new
|
|
Mail::IMAPClient::BodyStructure objects. The B<new> method accepts one
|
|
argument, a string containing a server response to a FETCH BODYSTRUCTURE
|
|
directive. Only one message's body structure should be described in this
|
|
string, although that message may contain an arbitrary number of parts.
|
|
|
|
If you know the messages sequence number or unique ID (UID)
|
|
but haven't got its body structure, and you want to get the body
|
|
structure and parse it into a B<Mail::IMAPClient::BodyStructure>
|
|
object, then you might as well save yourself some work and use
|
|
B<Mail::IMAPClient>'s B<get_bodystructure> method, which accepts
|
|
a message sequence number (or UID if I<Uid> is true) and returns a
|
|
B<Mail::IMAPClient::BodyStructure> object. It's functionally equivalent
|
|
to issuing the FETCH BODYSTRUCTURE IMAP client command and then passing
|
|
the results to B<Mail::IMAPClient::BodyStructure>'s B<new> method but
|
|
it does those things in one simple method call.
|
|
|
|
=head1 Object Methods
|
|
|
|
The following object methods are available:
|
|
|
|
=head2 bodytype
|
|
|
|
The B<bodytype> object method requires no arguments. It returns the
|
|
bodytype for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodysubtype
|
|
|
|
The B<bodysubtype> object method requires no arguments. It returns the
|
|
bodysubtype for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodyparms
|
|
|
|
The B<bodyparms> object method requires no arguments. It returns the
|
|
bodyparms for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodydisp
|
|
|
|
The B<bodydisp> object method requires no arguments. It returns the
|
|
bodydisp for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodyid
|
|
|
|
The B<bodyid> object method requires no arguments. It returns the
|
|
bodyid for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodydesc
|
|
|
|
The B<bodydesc> object method requires no arguments. It returns the
|
|
bodydesc for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodyenc
|
|
|
|
The B<bodyenc> object method requires no arguments. It returns the
|
|
bodyenc for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodysize
|
|
|
|
The B<bodysize> object method requires no arguments. It returns the
|
|
bodysize for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodylang
|
|
|
|
The B<bodylang> object method requires no arguments. It returns the
|
|
bodylang for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 bodystructure
|
|
|
|
The B<bodystructure> object method requires no arguments. It returns
|
|
the bodystructure for the message whose structure is described by the
|
|
calling B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head2 envelopestruct
|
|
|
|
The B<envelopestruct> object method requires no arguments. It returns
|
|
the envelopestruct for the message whose structure is described by the
|
|
calling B<Mail::IMAPClient::Bodystructure> object. This envelope structure
|
|
is blessed into the B<Mail::IMAPClient::BodyStructure::Envelope> subclass,
|
|
which is explained more fully below.
|
|
|
|
=head2 textlines
|
|
|
|
The B<textlines> object method requires no arguments. It returns the
|
|
textlines for the message whose structure is described by the calling
|
|
B<Mail::IMAPClient::Bodystructure> object.
|
|
|
|
=head1 Envelopes and the Mail::IMAPClient::BodyStructure::Envelope Subclass
|
|
|
|
The IMAP standard specifies that output from the IMAP B<FETCH
|
|
ENVELOPE> command will be an RFC2060 envelope structure. It further
|
|
specifies that output from the B<FETCH BODYSTRUCTURE> command may also
|
|
contain embedded envelope structures (if, for example, a message's
|
|
subparts contain one or more included messages). Objects belonging to
|
|
B<Mail::IMAPClient::BodyStructure::Envelope> are Perl representations
|
|
of these envelope structures, which is to say the nested parenthetical
|
|
lists of RFC2060 translated into a Perl datastructure.
|
|
|
|
Note that all of the fields relate to the specific part to which they
|
|
belong. In other words, output from a FETCH nnnn ENVELOPE command (or,
|
|
in B<Mail::IMAPClient>, C<$imap->fetch($msgid,"ENVELOPE")> or C<my $env =
|
|
$imap->get_envelope($msgid)>) are for the message, but fields from within
|
|
a bodystructure relate to the message subpart and not the parent message.
|
|
|
|
An envelope structure's B<Mail::IMAPClient::BodyStructure::Envelope>
|
|
representation is a hash of thingies that looks like this:
|
|
|
|
{
|
|
subject => "subject",
|
|
inreplyto => "reference_message_id",
|
|
from => [ addressStruct1 ],
|
|
messageid => "message_id",
|
|
bcc => [ addressStruct1, addressStruct2 ],
|
|
date => "Tue, 09 Jul 2002 14:15:53 -0400",
|
|
replyto => [ adressStruct1, addressStruct2 ],
|
|
to => [ adressStruct1, addressStruct2 ],
|
|
sender => [ adressStruct1 ],
|
|
cc => [ adressStruct1, addressStruct2 ],
|
|
}
|
|
|
|
The B<...::Envelope> object also has methods for accessing data in the
|
|
structure. They are:
|
|
|
|
=over 4
|
|
|
|
=item date
|
|
|
|
Returns the date of the message.
|
|
|
|
=item inreplyto
|
|
|
|
Returns the message id of the message to which this message is a reply.
|
|
|
|
=item subject
|
|
|
|
Returns the subject of the message.
|
|
|
|
=item messageid
|
|
|
|
Returns the message id of the message.
|
|
|
|
=back
|
|
|
|
You can also use the following methods to get addressing
|
|
information. Each of these methods returns an array of
|
|
B<Mail::IMAPClient::BodyStructure::Address> objects, which are perl
|
|
data structures representing RFC2060 address structures. Some of these
|
|
arrays would naturally contain one element (such as B<from>, which
|
|
normally contains a single "From:" address); others will often contain
|
|
more than one address. However, because RFC2060 defines all of these as
|
|
"lists of address structures", they are all translated into arrays of
|
|
B<...::Address> objects.
|
|
|
|
See the section on B<Mail::IMAPClient::BodyStructure::Address>", below,
|
|
for alternate (and preferred) ways of accessing these data.
|
|
|
|
The methods available are:
|
|
|
|
=over 4
|
|
|
|
=item bcc
|
|
|
|
Returns an array of blind cc'ed recipients' address structures. (Don't
|
|
expect much in here unless the message was sent from the mailbox you're
|
|
poking around in, by the way.)
|
|
|
|
=item cc
|
|
|
|
Returns an array of cc'ed recipients' address structures.
|
|
|
|
=item from
|
|
|
|
Returns an array of "From:" address structures--usually just one.
|
|
|
|
=item replyto
|
|
|
|
Returns an array of "Reply-to:" address structures. Once again there is
|
|
usually just one address in the list.
|
|
|
|
=item sender
|
|
|
|
Returns an array of senders' address structures--usually just one and
|
|
usually the same as B<from>.
|
|
|
|
=item to
|
|
|
|
Returns an array of recipients' address structures.
|
|
|
|
=back
|
|
|
|
Each of the methods that returns a list of address structures (i.e. a
|
|
list of B<Mail::IMAPClient::BodyStructure::Address> arrays) also has an
|
|
analagous method that will return a list of E-Mail addresses instead. The
|
|
addresses are in the format C<personalname E<lt>mailboxname@hostnameE<gt>>
|
|
(see the section on B<Mail::IMAPClient::BodyStructure::Address>,
|
|
below) However, if the personal name is 'NIL' then it is omitted from
|
|
the address.
|
|
|
|
These methods are:
|
|
|
|
=over 4
|
|
|
|
=item bcc_addresses
|
|
|
|
Returns a list (or an array reference if called in scalar context)
|
|
of blind cc'ed recipients' email addresses. (Don't expect much in here
|
|
unless the message was sent from the mailbox you're poking around in,
|
|
by the way.)
|
|
|
|
=item cc_addresses
|
|
|
|
Returns a list of cc'ed recipients' email addresses. If called in a scalar
|
|
context it returns a reference to an array of email addresses.
|
|
|
|
=item from_addresses
|
|
|
|
Returns a list of "From:" email addresses. If called in a scalar context
|
|
it returns the first email address in the list. (It's usually a list of just
|
|
one anyway.)
|
|
|
|
=item replyto_addresses
|
|
|
|
Returns a list of "Reply-to:" email addresses. If called in a scalar context
|
|
it returns the first email address in the list.
|
|
|
|
=item sender_addresses
|
|
|
|
Returns a list of senders' email addresses. If called in a scalar context
|
|
it returns the first email address in the list.
|
|
|
|
=item to_addresses
|
|
|
|
Returns a list of recipients' email addresses. If called in a scalar context
|
|
it returns a reference to an array of email addresses.
|
|
|
|
=back
|
|
|
|
Note that context affects the behavior of all of the above methods.
|
|
|
|
Those fields that will commonly contain multiple entries (i.e. they are
|
|
recipients) will return an array reference when called in scalar context.
|
|
You can use this behavior to optimize performance.
|
|
|
|
Those fields that will commonly contain just one address (the sender's) will
|
|
return the first (and usually only) address. You can use this behavior to
|
|
optimize your development time.
|
|
|
|
=head1 Addresses and the Mail::IMAPClient::BodyStructure::Address
|
|
|
|
Several components of an envelope structure are address
|
|
structures. They are each parsed into their own object,
|
|
B<Mail::IMAPClient::BodyStructure::Address>, which looks like this:
|
|
|
|
{ mailboxname => 'somebody.special'
|
|
, hostname => 'somplace.weird.com'
|
|
, personalname => 'Somebody Special
|
|
, sourceroute => 'NIL'
|
|
}
|
|
|
|
RFC2060 specifies that each address component of a bodystructure is a
|
|
list of address structures, so B<Mail::IMAPClient::BodyStructure> parses
|
|
each of these into an array of B<Mail::IMAPClient::BodyStructure::Address>
|
|
objects.
|
|
|
|
Each of these objects has the following methods available to it:
|
|
|
|
=over 4
|
|
|
|
=item mailboxname
|
|
|
|
Returns the "mailboxname" portion of the address, which is the part to
|
|
the left of the '@' sign.
|
|
|
|
=item hostname
|
|
|
|
Returns the "hostname" portion of the address, which is the part to the
|
|
right of the '@' sign.
|
|
|
|
=item personalname
|
|
|
|
Returns the "personalname" portion of the address, which is the part of
|
|
the address that's treated like a comment.
|
|
|
|
=item sourceroute
|
|
|
|
Returns the "sourceroute" portion of the address, which is typically "NIL".
|
|
|
|
=back
|
|
|
|
Taken together, the parts of an address structure form an address that will
|
|
look something like this:
|
|
|
|
C<personalname E<lt>mailboxname@hostnameE<gt>>
|
|
|
|
Note that because the B<Mail::IMAPClient::BodyStructure::Address>
|
|
objects come in arrays, it's generally easier to use the methods
|
|
available to B<Mail::IMAPClient::BodyStructure::Envelope> to obtain
|
|
all of the addresses in a particular array in one operation. These
|
|
methods are provided, however, in case you'd rather do things
|
|
the hard way. (And also because the aforementioned methods from
|
|
B<Mail::IMAPClient::BodyStructure::Envelope> need them anyway.)
|
|
|
|
=cut
|
|
|
|
=head1 AUTHOR
|
|
|
|
David J. Kernen
|
|
|
|
Reworked by Mark Overmeer.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
perl(1), Mail::IMAPClient, and RFC2060. See also Parse::RecDescent if you
|
|
want to understand the internals of this module.
|
|
|
|
=cut
|