This commit is contained in:
commit
47aee91ef4
396 changed files with 32003 additions and 0 deletions
505
tools/Perl/bin/GET
Normal file
505
tools/Perl/bin/GET
Normal file
|
@ -0,0 +1,505 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-request.PL,v 1.33 1998/08/04 09:18:11 aas Exp $
|
||||
#
|
||||
# Simple user agent using LWP library.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-request, GET, HEAD, POST - Simple WWW user agent
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-request [-aeEdvhx] [-m method] [-b <base URL>] [-t <timeout>]
|
||||
[-i <if-modified-since>] [-c <content-type>] [-C <credentials>]
|
||||
[-p <proxy-url>] [-o <format>] <url>...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program can be used to send requests to WWW servers and your
|
||||
local file system. The request content for POST, PUT and CHECKIN
|
||||
methods is read from stdin. The content of the response is printed on
|
||||
stdout. Error messages are printed on stderr. The program returns a
|
||||
status value indicating the number of URLs that failed.
|
||||
|
||||
The options are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -m <method>
|
||||
|
||||
Set which method to use for the request. If this option is not used,
|
||||
then the method is derived from the name of the program.
|
||||
|
||||
=item -f
|
||||
|
||||
Force request through, even if the program believes that the method is
|
||||
illegal. The server will probably reject the request.
|
||||
|
||||
=item -b <url>
|
||||
|
||||
This URL will be used as the base URL for the URLs that the method is
|
||||
applied to. The base URL only takes effect for relative URLs. If you
|
||||
do not provide this option and the URLs are relative, then they will
|
||||
be treated as files in the local file system.
|
||||
|
||||
=item -t <timeout>
|
||||
|
||||
Set the timeout value for the requests. The timeout is the amount of
|
||||
time that the program will wait for a response from the remote server
|
||||
before it fails. The default unit for the timeout value is seconds.
|
||||
You might append "m" or "h" to the timeout value to make it minutes or
|
||||
hours, respectively. The default timeout is '3m', i.e. 3 minutes.
|
||||
|
||||
=item -i <time>
|
||||
|
||||
Set the If-Modified-Since header in the request. If I<time> it the
|
||||
name of a file, use the modification timestamp for this file. If
|
||||
I<time> is not a file, it is parsed as a literal date. Take a look at
|
||||
L<HTTP::Date> for recogniced formats.
|
||||
|
||||
=item -c <content-type>
|
||||
|
||||
Set the Content-Type for the request. This option is only allowed for
|
||||
requests that take a content, i.e. POST, PUT and CHECKIN. You can
|
||||
force methods to take content by using the C<-f> option together with
|
||||
C<-c>. The default Content-Type for POST is
|
||||
C<application/x-www-form-urlencoded>. The default Content-type for
|
||||
the others is C<text/plain>.
|
||||
|
||||
=item -p <proxy-url>
|
||||
|
||||
Set the proxy to be used for the requests. The program also loads
|
||||
proxy settings from the environment. You can disable this with the
|
||||
C<-P> option.
|
||||
|
||||
=item -C <username>:<password>
|
||||
|
||||
Provide credentials for documents that are protected by Basic
|
||||
Authentication. If the document is protected and you did not specify
|
||||
the username and password with this option, then you will be prompted
|
||||
to provide these values.
|
||||
|
||||
=back
|
||||
|
||||
The following options controls what is displayed by the program:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -u
|
||||
|
||||
Print request method and absolute URL as requests are made.
|
||||
|
||||
=item -U
|
||||
|
||||
Print request headers in addition to request method and absolute URL.
|
||||
|
||||
=item -s
|
||||
|
||||
Print response status code. This option is always on for HEAD requests.
|
||||
|
||||
=item -S
|
||||
|
||||
Print response status chain. This shows redirect and autorization
|
||||
requests that are handled by the library.
|
||||
|
||||
=item -e
|
||||
|
||||
Print response headers. This option is always on for HEAD requests.
|
||||
|
||||
=item -d
|
||||
|
||||
Do B<not> print the content of the response.
|
||||
|
||||
=item -o <format>
|
||||
|
||||
Process HTML content in various ways before printing it. If the
|
||||
content type of the response is not HTML, then this option has no
|
||||
effect. The legal format values are; I<text>, I<ps>, I<links>,
|
||||
I<html> and I<dump>.
|
||||
|
||||
If you specify the I<text> format then the HTML will be formatted as
|
||||
plain latin1 text. If you specify the I<ps> format then it will be
|
||||
formatted as Postscript.
|
||||
|
||||
The I<links> format will output all links found in the HTML document.
|
||||
Relative links will be expanded to absolute ones.
|
||||
|
||||
The I<html> format will reformat the HTML code and the I<dump> format
|
||||
will just dump the HTML syntax tree.
|
||||
|
||||
=item -v
|
||||
|
||||
Print the version number of the program and quit.
|
||||
|
||||
=item -h
|
||||
|
||||
Print usage message and quit.
|
||||
|
||||
=item -x
|
||||
|
||||
Extra debugging output.
|
||||
|
||||
=item -a
|
||||
|
||||
Set text(ascii) mode for content input and output. If this option is not
|
||||
used, content input and output is done in binary mode.
|
||||
|
||||
=back
|
||||
|
||||
Because this program is implemented using the LWP library, it will
|
||||
only support the protocols that LWP supports.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-mirror>, L<LWP>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1995-1997 Gisle Aas.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
$progname = $0;
|
||||
$progname =~ s,.*/,,; # use basename only
|
||||
$progname =~ s/\.\w*$//; # strip extension, if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
|
||||
require LWP;
|
||||
require LWP::Debug;
|
||||
require URI::URL;
|
||||
|
||||
use URI::Heuristic qw(uf_url);
|
||||
|
||||
use HTTP::Status qw(status_message);
|
||||
use HTTP::Date qw(time2str str2time);
|
||||
|
||||
|
||||
# This table lists the methods that are allowed. It should really be
|
||||
# a superset for all methods supported for every scheme that may be
|
||||
# supported by the library. Currently it might be a bit too HTTP
|
||||
# specific. You might use the -f option to force a method through.
|
||||
#
|
||||
# "" = No content in request, "C" = Needs content in request
|
||||
#
|
||||
%allowed_methods = (
|
||||
GET => "",
|
||||
HEAD => "",
|
||||
POST => "C",
|
||||
PUT => "C",
|
||||
DELETE => "",
|
||||
TRACE => "",
|
||||
OPTIONS => "",
|
||||
);
|
||||
|
||||
|
||||
# We make our own specialization of LWP::UserAgent that asks for
|
||||
# user/password if document is protected.
|
||||
{
|
||||
package RequestAgent;
|
||||
@ISA = qw(LWP::UserAgent);
|
||||
|
||||
sub new
|
||||
{
|
||||
my $self = LWP::UserAgent::new(@_);
|
||||
$self->agent("lwp-request/$main::VERSION");
|
||||
$self;
|
||||
}
|
||||
|
||||
sub get_basic_credentials
|
||||
{
|
||||
my($self, $realm, $uri) = @_;
|
||||
if ($main::opt_C) {
|
||||
return split(':', $main::opt_C, 2);
|
||||
} elsif (-t) {
|
||||
my $netloc = $uri->netloc;
|
||||
print "Enter username for $realm at $netloc: ";
|
||||
my $user = <STDIN>;
|
||||
chomp($user);
|
||||
return (undef, undef) unless length $user;
|
||||
print "Password: ";
|
||||
system("stty -echo");
|
||||
my $password = <STDIN>;
|
||||
system("stty echo");
|
||||
print "\n"; # because we disabled echo
|
||||
chomp($password);
|
||||
return ($user, $password);
|
||||
} else {
|
||||
return (undef, undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
|
||||
|
||||
# Parse command line
|
||||
use Getopt::Std;
|
||||
|
||||
$opt_a = undef; # content i/o in text(ascii) mode
|
||||
$opt_m = undef; # set method
|
||||
$opt_f = undef; # make request even if method is not in %allowed_methods
|
||||
$opt_b = undef; # base url
|
||||
$opt_t = undef; # timeout
|
||||
$opt_i = undef; # if-modified-since
|
||||
$opt_c = undef; # content type for POST
|
||||
$opt_C = undef; # credidentials for basic authorization
|
||||
|
||||
$opt_u = undef; # display method, URL and headers of request
|
||||
$opt_U = undef; # display request headers also
|
||||
$opt_s = undef; # display status code
|
||||
$opt_S = undef; # display whole chain of status codes
|
||||
$opt_e = undef; # display response headers (default for HEAD)
|
||||
$opt_d = undef; # don't display content
|
||||
|
||||
$opt_h = undef; # print usage
|
||||
$opt_v = undef; # print version
|
||||
|
||||
$opt_x = undef; # extra debugging info
|
||||
$opt_p = undef; # proxy URL
|
||||
$opt_P = undef; # don't load proxy setting from environment
|
||||
|
||||
$opt_o = undef; # output format
|
||||
|
||||
unless (getopts("axhvuUsSedPp:b:t:i:c:C:m:fo:")) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if ($opt_v) {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
die <<"EOT";
|
||||
This is lwp-request version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1995-1997, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
}
|
||||
|
||||
usage() if $opt_h || !@ARGV;
|
||||
|
||||
LWP::Debug::level('+') if $opt_x;
|
||||
|
||||
# Create the user agent object
|
||||
$ua = new RequestAgent;
|
||||
|
||||
# Load proxy settings from *_proxy environment variables.
|
||||
$ua->env_proxy unless $opt_P;
|
||||
|
||||
$method = uc($opt_m) if defined $opt_m;
|
||||
|
||||
if ($opt_f) {
|
||||
if ($opt_c) {
|
||||
$allowed_methods{$method} = "C"; # force content
|
||||
} else {
|
||||
$allowed_methods{$method} = "";
|
||||
}
|
||||
} elsif (!defined $allowed_methods{$method}) {
|
||||
die "$progname: $method is not an allowed method\n";
|
||||
}
|
||||
|
||||
if ($method eq "HEAD") {
|
||||
$opt_s = 1;
|
||||
$opt_e = 1 unless $opt_d;
|
||||
$opt_d = 1;
|
||||
}
|
||||
|
||||
if (defined $opt_t) {
|
||||
$opt_t =~ /^(\d+)([smh])?/;
|
||||
die "$progname: Illegal timeout value!\n" unless defined $1;
|
||||
$timeout = $1;
|
||||
$timeout *= 60 if ($2 eq "m");
|
||||
$timeout *= 3600 if ($2 eq "h");
|
||||
$ua->timeout($timeout);
|
||||
}
|
||||
|
||||
if (defined $opt_i) {
|
||||
if (-e $opt_i) {
|
||||
$time = (stat _)[9];
|
||||
} else {
|
||||
$time = str2time($opt_i);
|
||||
die "$progname: Illegal time syntax for -i option\n"
|
||||
unless defined $time;
|
||||
}
|
||||
$opt_i = time2str($time);
|
||||
}
|
||||
|
||||
$content = undef;
|
||||
if ($allowed_methods{$method} eq "C") {
|
||||
# This request needs some content
|
||||
unless (defined $opt_c) {
|
||||
# set default content type
|
||||
$opt_c = ($method eq "POST") ?
|
||||
"application/x-www-form-urlencoded"
|
||||
: "text/plain";
|
||||
} else {
|
||||
die "$progname: Illegal Content-type format\n"
|
||||
unless $opt_c =~ m,^[\w\-]+/[\w\-]+(?:\s*;.*)?$,
|
||||
}
|
||||
print "Please enter content ($opt_c) to be ${method}ed:\n"
|
||||
if -t;
|
||||
binmode STDIN unless -t or $opt_a;
|
||||
$content = join("", <STDIN>);
|
||||
} else {
|
||||
die "$progname: Can't set Content-type for $method requests\n"
|
||||
if defined $opt_c;
|
||||
}
|
||||
|
||||
# Set up a request. We will use the same request object for all URLs.
|
||||
$request = new HTTP::Request $method;
|
||||
$request->header('If-Modified-Since', $opt_i) if defined $opt_i;
|
||||
#$request->header('Accept', '*/*');
|
||||
if ($opt_c) { # will always be set for request that wants content
|
||||
$request->header('Content-Type', $opt_c);
|
||||
$request->header('Content-Length', length $content); # Not really needed
|
||||
$request->content($content);
|
||||
}
|
||||
|
||||
|
||||
$errors = 0;
|
||||
|
||||
# Ok, now we perform the requests, one URL at a time
|
||||
while ($url = shift) {
|
||||
# Create the URL object, but protect us against bad URLs
|
||||
eval {
|
||||
if ($url =~ /^\w+:/ || $opt_b) { # is there any scheme specification
|
||||
$url = URI::URL->new($url, $opt_b);
|
||||
} else {
|
||||
$url = uf_url($url);
|
||||
}
|
||||
};
|
||||
if ($@) {
|
||||
$@ =~ s/at\s+\S+\s+line\s\d+//;
|
||||
print STDERR $@;
|
||||
$errors++;
|
||||
next;
|
||||
}
|
||||
|
||||
$ua->proxy($url->scheme, $opt_p) if $opt_p;
|
||||
|
||||
# Send the request and get a response back from the server
|
||||
$request->url($url);
|
||||
$response = $ua->request($request);
|
||||
|
||||
if ($opt_u || $opt_U) {
|
||||
my $url = $response->request->url->as_string;
|
||||
print "$method $url\n";
|
||||
print $response->request->headers_as_string, "\n" if $opt_U;
|
||||
}
|
||||
|
||||
if ($opt_S) {
|
||||
printResponseChain($response);
|
||||
} elsif ($opt_s) {
|
||||
print $response->status_line, "\n";
|
||||
}
|
||||
|
||||
if ($opt_e) {
|
||||
# Display headers
|
||||
print $response->headers_as_string;
|
||||
print "\n"; # separate headers and content
|
||||
}
|
||||
|
||||
if ($response->is_success) {
|
||||
unless ($opt_d) {
|
||||
if ($opt_o &&
|
||||
$response->content_type eq 'text/html') {
|
||||
require HTML::Parse;
|
||||
my $html = HTML::Parse::parse_html($response->content);
|
||||
{
|
||||
$opt_o eq 'ps' && do {
|
||||
require HTML::FormatPS;
|
||||
my $f = new HTML::FormatPS;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'text' && do {
|
||||
require HTML::FormatText;
|
||||
my $f = new HTML::FormatText;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'html' && do {
|
||||
print $html->as_HTML;
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'links' && do {
|
||||
my $base = $response->base;
|
||||
for ( @{ $html->extract_links } ) {
|
||||
my($link, $elem) = @$_;
|
||||
my $tag = uc $elem->tag;
|
||||
$link = new URI::URL $link, $base;
|
||||
print "$tag\t", $link->abs->as_string, "\n";
|
||||
}
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'dump' && do {
|
||||
$html->dump;
|
||||
last;
|
||||
};
|
||||
# It is bad to not notice this before now :-(
|
||||
die "Illegal -o option value ($opt_o)\n";
|
||||
}
|
||||
} else {
|
||||
binmode STDOUT unless $opt_a;
|
||||
print $response->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print STDERR $response->error_as_HTML unless $opt_d;
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
exit $errors;
|
||||
|
||||
|
||||
sub printResponseChain
|
||||
{
|
||||
my($response) = @_;
|
||||
return unless defined $response;
|
||||
printResponseChain($response->previous);
|
||||
my $method = $response->request->method;
|
||||
my $url = $response->request->url->as_string;
|
||||
my $code = $response->code;
|
||||
print "$method $url --> ", $response->status_line, "\n";
|
||||
}
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"EOT";
|
||||
Usage: $progname [-options] <url>...
|
||||
-m <method> use method for the request (default is '$method')
|
||||
-f make request even if $progname believes method is illegal
|
||||
-b <base> Use the specified URL as base
|
||||
-t <timeout> Set timeout value
|
||||
-i <time> Set the If-Modified-Since header on the request
|
||||
-c <conttype> use this content-type for POST, PUT, CHECKIN
|
||||
-a Use text mode for content I/O
|
||||
-p <proxyurl> use this as a proxy
|
||||
-P don't load proxy settings from environment
|
||||
|
||||
-u Display method and URL before any response
|
||||
-U Display request headers (implies -u)
|
||||
-s Display response status code
|
||||
-S Display response status chain
|
||||
-e Display response headers
|
||||
-d Do not display content
|
||||
-o <format> Process HTML content in various ways
|
||||
|
||||
-v Show program version
|
||||
-h Print this message
|
||||
|
||||
-x Extra debugging output
|
||||
EOT
|
||||
}
|
505
tools/Perl/bin/HEAD
Normal file
505
tools/Perl/bin/HEAD
Normal file
|
@ -0,0 +1,505 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-request.PL,v 1.33 1998/08/04 09:18:11 aas Exp $
|
||||
#
|
||||
# Simple user agent using LWP library.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-request, GET, HEAD, POST - Simple WWW user agent
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-request [-aeEdvhx] [-m method] [-b <base URL>] [-t <timeout>]
|
||||
[-i <if-modified-since>] [-c <content-type>] [-C <credentials>]
|
||||
[-p <proxy-url>] [-o <format>] <url>...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program can be used to send requests to WWW servers and your
|
||||
local file system. The request content for POST, PUT and CHECKIN
|
||||
methods is read from stdin. The content of the response is printed on
|
||||
stdout. Error messages are printed on stderr. The program returns a
|
||||
status value indicating the number of URLs that failed.
|
||||
|
||||
The options are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -m <method>
|
||||
|
||||
Set which method to use for the request. If this option is not used,
|
||||
then the method is derived from the name of the program.
|
||||
|
||||
=item -f
|
||||
|
||||
Force request through, even if the program believes that the method is
|
||||
illegal. The server will probably reject the request.
|
||||
|
||||
=item -b <url>
|
||||
|
||||
This URL will be used as the base URL for the URLs that the method is
|
||||
applied to. The base URL only takes effect for relative URLs. If you
|
||||
do not provide this option and the URLs are relative, then they will
|
||||
be treated as files in the local file system.
|
||||
|
||||
=item -t <timeout>
|
||||
|
||||
Set the timeout value for the requests. The timeout is the amount of
|
||||
time that the program will wait for a response from the remote server
|
||||
before it fails. The default unit for the timeout value is seconds.
|
||||
You might append "m" or "h" to the timeout value to make it minutes or
|
||||
hours, respectively. The default timeout is '3m', i.e. 3 minutes.
|
||||
|
||||
=item -i <time>
|
||||
|
||||
Set the If-Modified-Since header in the request. If I<time> it the
|
||||
name of a file, use the modification timestamp for this file. If
|
||||
I<time> is not a file, it is parsed as a literal date. Take a look at
|
||||
L<HTTP::Date> for recogniced formats.
|
||||
|
||||
=item -c <content-type>
|
||||
|
||||
Set the Content-Type for the request. This option is only allowed for
|
||||
requests that take a content, i.e. POST, PUT and CHECKIN. You can
|
||||
force methods to take content by using the C<-f> option together with
|
||||
C<-c>. The default Content-Type for POST is
|
||||
C<application/x-www-form-urlencoded>. The default Content-type for
|
||||
the others is C<text/plain>.
|
||||
|
||||
=item -p <proxy-url>
|
||||
|
||||
Set the proxy to be used for the requests. The program also loads
|
||||
proxy settings from the environment. You can disable this with the
|
||||
C<-P> option.
|
||||
|
||||
=item -C <username>:<password>
|
||||
|
||||
Provide credentials for documents that are protected by Basic
|
||||
Authentication. If the document is protected and you did not specify
|
||||
the username and password with this option, then you will be prompted
|
||||
to provide these values.
|
||||
|
||||
=back
|
||||
|
||||
The following options controls what is displayed by the program:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -u
|
||||
|
||||
Print request method and absolute URL as requests are made.
|
||||
|
||||
=item -U
|
||||
|
||||
Print request headers in addition to request method and absolute URL.
|
||||
|
||||
=item -s
|
||||
|
||||
Print response status code. This option is always on for HEAD requests.
|
||||
|
||||
=item -S
|
||||
|
||||
Print response status chain. This shows redirect and autorization
|
||||
requests that are handled by the library.
|
||||
|
||||
=item -e
|
||||
|
||||
Print response headers. This option is always on for HEAD requests.
|
||||
|
||||
=item -d
|
||||
|
||||
Do B<not> print the content of the response.
|
||||
|
||||
=item -o <format>
|
||||
|
||||
Process HTML content in various ways before printing it. If the
|
||||
content type of the response is not HTML, then this option has no
|
||||
effect. The legal format values are; I<text>, I<ps>, I<links>,
|
||||
I<html> and I<dump>.
|
||||
|
||||
If you specify the I<text> format then the HTML will be formatted as
|
||||
plain latin1 text. If you specify the I<ps> format then it will be
|
||||
formatted as Postscript.
|
||||
|
||||
The I<links> format will output all links found in the HTML document.
|
||||
Relative links will be expanded to absolute ones.
|
||||
|
||||
The I<html> format will reformat the HTML code and the I<dump> format
|
||||
will just dump the HTML syntax tree.
|
||||
|
||||
=item -v
|
||||
|
||||
Print the version number of the program and quit.
|
||||
|
||||
=item -h
|
||||
|
||||
Print usage message and quit.
|
||||
|
||||
=item -x
|
||||
|
||||
Extra debugging output.
|
||||
|
||||
=item -a
|
||||
|
||||
Set text(ascii) mode for content input and output. If this option is not
|
||||
used, content input and output is done in binary mode.
|
||||
|
||||
=back
|
||||
|
||||
Because this program is implemented using the LWP library, it will
|
||||
only support the protocols that LWP supports.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-mirror>, L<LWP>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1995-1997 Gisle Aas.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
$progname = $0;
|
||||
$progname =~ s,.*/,,; # use basename only
|
||||
$progname =~ s/\.\w*$//; # strip extension, if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
|
||||
require LWP;
|
||||
require LWP::Debug;
|
||||
require URI::URL;
|
||||
|
||||
use URI::Heuristic qw(uf_url);
|
||||
|
||||
use HTTP::Status qw(status_message);
|
||||
use HTTP::Date qw(time2str str2time);
|
||||
|
||||
|
||||
# This table lists the methods that are allowed. It should really be
|
||||
# a superset for all methods supported for every scheme that may be
|
||||
# supported by the library. Currently it might be a bit too HTTP
|
||||
# specific. You might use the -f option to force a method through.
|
||||
#
|
||||
# "" = No content in request, "C" = Needs content in request
|
||||
#
|
||||
%allowed_methods = (
|
||||
GET => "",
|
||||
HEAD => "",
|
||||
POST => "C",
|
||||
PUT => "C",
|
||||
DELETE => "",
|
||||
TRACE => "",
|
||||
OPTIONS => "",
|
||||
);
|
||||
|
||||
|
||||
# We make our own specialization of LWP::UserAgent that asks for
|
||||
# user/password if document is protected.
|
||||
{
|
||||
package RequestAgent;
|
||||
@ISA = qw(LWP::UserAgent);
|
||||
|
||||
sub new
|
||||
{
|
||||
my $self = LWP::UserAgent::new(@_);
|
||||
$self->agent("lwp-request/$main::VERSION");
|
||||
$self;
|
||||
}
|
||||
|
||||
sub get_basic_credentials
|
||||
{
|
||||
my($self, $realm, $uri) = @_;
|
||||
if ($main::opt_C) {
|
||||
return split(':', $main::opt_C, 2);
|
||||
} elsif (-t) {
|
||||
my $netloc = $uri->netloc;
|
||||
print "Enter username for $realm at $netloc: ";
|
||||
my $user = <STDIN>;
|
||||
chomp($user);
|
||||
return (undef, undef) unless length $user;
|
||||
print "Password: ";
|
||||
system("stty -echo");
|
||||
my $password = <STDIN>;
|
||||
system("stty echo");
|
||||
print "\n"; # because we disabled echo
|
||||
chomp($password);
|
||||
return ($user, $password);
|
||||
} else {
|
||||
return (undef, undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
|
||||
|
||||
# Parse command line
|
||||
use Getopt::Std;
|
||||
|
||||
$opt_a = undef; # content i/o in text(ascii) mode
|
||||
$opt_m = undef; # set method
|
||||
$opt_f = undef; # make request even if method is not in %allowed_methods
|
||||
$opt_b = undef; # base url
|
||||
$opt_t = undef; # timeout
|
||||
$opt_i = undef; # if-modified-since
|
||||
$opt_c = undef; # content type for POST
|
||||
$opt_C = undef; # credidentials for basic authorization
|
||||
|
||||
$opt_u = undef; # display method, URL and headers of request
|
||||
$opt_U = undef; # display request headers also
|
||||
$opt_s = undef; # display status code
|
||||
$opt_S = undef; # display whole chain of status codes
|
||||
$opt_e = undef; # display response headers (default for HEAD)
|
||||
$opt_d = undef; # don't display content
|
||||
|
||||
$opt_h = undef; # print usage
|
||||
$opt_v = undef; # print version
|
||||
|
||||
$opt_x = undef; # extra debugging info
|
||||
$opt_p = undef; # proxy URL
|
||||
$opt_P = undef; # don't load proxy setting from environment
|
||||
|
||||
$opt_o = undef; # output format
|
||||
|
||||
unless (getopts("axhvuUsSedPp:b:t:i:c:C:m:fo:")) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if ($opt_v) {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
die <<"EOT";
|
||||
This is lwp-request version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1995-1997, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
}
|
||||
|
||||
usage() if $opt_h || !@ARGV;
|
||||
|
||||
LWP::Debug::level('+') if $opt_x;
|
||||
|
||||
# Create the user agent object
|
||||
$ua = new RequestAgent;
|
||||
|
||||
# Load proxy settings from *_proxy environment variables.
|
||||
$ua->env_proxy unless $opt_P;
|
||||
|
||||
$method = uc($opt_m) if defined $opt_m;
|
||||
|
||||
if ($opt_f) {
|
||||
if ($opt_c) {
|
||||
$allowed_methods{$method} = "C"; # force content
|
||||
} else {
|
||||
$allowed_methods{$method} = "";
|
||||
}
|
||||
} elsif (!defined $allowed_methods{$method}) {
|
||||
die "$progname: $method is not an allowed method\n";
|
||||
}
|
||||
|
||||
if ($method eq "HEAD") {
|
||||
$opt_s = 1;
|
||||
$opt_e = 1 unless $opt_d;
|
||||
$opt_d = 1;
|
||||
}
|
||||
|
||||
if (defined $opt_t) {
|
||||
$opt_t =~ /^(\d+)([smh])?/;
|
||||
die "$progname: Illegal timeout value!\n" unless defined $1;
|
||||
$timeout = $1;
|
||||
$timeout *= 60 if ($2 eq "m");
|
||||
$timeout *= 3600 if ($2 eq "h");
|
||||
$ua->timeout($timeout);
|
||||
}
|
||||
|
||||
if (defined $opt_i) {
|
||||
if (-e $opt_i) {
|
||||
$time = (stat _)[9];
|
||||
} else {
|
||||
$time = str2time($opt_i);
|
||||
die "$progname: Illegal time syntax for -i option\n"
|
||||
unless defined $time;
|
||||
}
|
||||
$opt_i = time2str($time);
|
||||
}
|
||||
|
||||
$content = undef;
|
||||
if ($allowed_methods{$method} eq "C") {
|
||||
# This request needs some content
|
||||
unless (defined $opt_c) {
|
||||
# set default content type
|
||||
$opt_c = ($method eq "POST") ?
|
||||
"application/x-www-form-urlencoded"
|
||||
: "text/plain";
|
||||
} else {
|
||||
die "$progname: Illegal Content-type format\n"
|
||||
unless $opt_c =~ m,^[\w\-]+/[\w\-]+(?:\s*;.*)?$,
|
||||
}
|
||||
print "Please enter content ($opt_c) to be ${method}ed:\n"
|
||||
if -t;
|
||||
binmode STDIN unless -t or $opt_a;
|
||||
$content = join("", <STDIN>);
|
||||
} else {
|
||||
die "$progname: Can't set Content-type for $method requests\n"
|
||||
if defined $opt_c;
|
||||
}
|
||||
|
||||
# Set up a request. We will use the same request object for all URLs.
|
||||
$request = new HTTP::Request $method;
|
||||
$request->header('If-Modified-Since', $opt_i) if defined $opt_i;
|
||||
#$request->header('Accept', '*/*');
|
||||
if ($opt_c) { # will always be set for request that wants content
|
||||
$request->header('Content-Type', $opt_c);
|
||||
$request->header('Content-Length', length $content); # Not really needed
|
||||
$request->content($content);
|
||||
}
|
||||
|
||||
|
||||
$errors = 0;
|
||||
|
||||
# Ok, now we perform the requests, one URL at a time
|
||||
while ($url = shift) {
|
||||
# Create the URL object, but protect us against bad URLs
|
||||
eval {
|
||||
if ($url =~ /^\w+:/ || $opt_b) { # is there any scheme specification
|
||||
$url = URI::URL->new($url, $opt_b);
|
||||
} else {
|
||||
$url = uf_url($url);
|
||||
}
|
||||
};
|
||||
if ($@) {
|
||||
$@ =~ s/at\s+\S+\s+line\s\d+//;
|
||||
print STDERR $@;
|
||||
$errors++;
|
||||
next;
|
||||
}
|
||||
|
||||
$ua->proxy($url->scheme, $opt_p) if $opt_p;
|
||||
|
||||
# Send the request and get a response back from the server
|
||||
$request->url($url);
|
||||
$response = $ua->request($request);
|
||||
|
||||
if ($opt_u || $opt_U) {
|
||||
my $url = $response->request->url->as_string;
|
||||
print "$method $url\n";
|
||||
print $response->request->headers_as_string, "\n" if $opt_U;
|
||||
}
|
||||
|
||||
if ($opt_S) {
|
||||
printResponseChain($response);
|
||||
} elsif ($opt_s) {
|
||||
print $response->status_line, "\n";
|
||||
}
|
||||
|
||||
if ($opt_e) {
|
||||
# Display headers
|
||||
print $response->headers_as_string;
|
||||
print "\n"; # separate headers and content
|
||||
}
|
||||
|
||||
if ($response->is_success) {
|
||||
unless ($opt_d) {
|
||||
if ($opt_o &&
|
||||
$response->content_type eq 'text/html') {
|
||||
require HTML::Parse;
|
||||
my $html = HTML::Parse::parse_html($response->content);
|
||||
{
|
||||
$opt_o eq 'ps' && do {
|
||||
require HTML::FormatPS;
|
||||
my $f = new HTML::FormatPS;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'text' && do {
|
||||
require HTML::FormatText;
|
||||
my $f = new HTML::FormatText;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'html' && do {
|
||||
print $html->as_HTML;
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'links' && do {
|
||||
my $base = $response->base;
|
||||
for ( @{ $html->extract_links } ) {
|
||||
my($link, $elem) = @$_;
|
||||
my $tag = uc $elem->tag;
|
||||
$link = new URI::URL $link, $base;
|
||||
print "$tag\t", $link->abs->as_string, "\n";
|
||||
}
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'dump' && do {
|
||||
$html->dump;
|
||||
last;
|
||||
};
|
||||
# It is bad to not notice this before now :-(
|
||||
die "Illegal -o option value ($opt_o)\n";
|
||||
}
|
||||
} else {
|
||||
binmode STDOUT unless $opt_a;
|
||||
print $response->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print STDERR $response->error_as_HTML unless $opt_d;
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
exit $errors;
|
||||
|
||||
|
||||
sub printResponseChain
|
||||
{
|
||||
my($response) = @_;
|
||||
return unless defined $response;
|
||||
printResponseChain($response->previous);
|
||||
my $method = $response->request->method;
|
||||
my $url = $response->request->url->as_string;
|
||||
my $code = $response->code;
|
||||
print "$method $url --> ", $response->status_line, "\n";
|
||||
}
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"EOT";
|
||||
Usage: $progname [-options] <url>...
|
||||
-m <method> use method for the request (default is '$method')
|
||||
-f make request even if $progname believes method is illegal
|
||||
-b <base> Use the specified URL as base
|
||||
-t <timeout> Set timeout value
|
||||
-i <time> Set the If-Modified-Since header on the request
|
||||
-c <conttype> use this content-type for POST, PUT, CHECKIN
|
||||
-a Use text mode for content I/O
|
||||
-p <proxyurl> use this as a proxy
|
||||
-P don't load proxy settings from environment
|
||||
|
||||
-u Display method and URL before any response
|
||||
-U Display request headers (implies -u)
|
||||
-s Display response status code
|
||||
-S Display response status chain
|
||||
-e Display response headers
|
||||
-d Do not display content
|
||||
-o <format> Process HTML content in various ways
|
||||
|
||||
-v Show program version
|
||||
-h Print this message
|
||||
|
||||
-x Extra debugging output
|
||||
EOT
|
||||
}
|
505
tools/Perl/bin/POST
Normal file
505
tools/Perl/bin/POST
Normal file
|
@ -0,0 +1,505 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-request.PL,v 1.33 1998/08/04 09:18:11 aas Exp $
|
||||
#
|
||||
# Simple user agent using LWP library.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-request, GET, HEAD, POST - Simple WWW user agent
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-request [-aeEdvhx] [-m method] [-b <base URL>] [-t <timeout>]
|
||||
[-i <if-modified-since>] [-c <content-type>] [-C <credentials>]
|
||||
[-p <proxy-url>] [-o <format>] <url>...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program can be used to send requests to WWW servers and your
|
||||
local file system. The request content for POST, PUT and CHECKIN
|
||||
methods is read from stdin. The content of the response is printed on
|
||||
stdout. Error messages are printed on stderr. The program returns a
|
||||
status value indicating the number of URLs that failed.
|
||||
|
||||
The options are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -m <method>
|
||||
|
||||
Set which method to use for the request. If this option is not used,
|
||||
then the method is derived from the name of the program.
|
||||
|
||||
=item -f
|
||||
|
||||
Force request through, even if the program believes that the method is
|
||||
illegal. The server will probably reject the request.
|
||||
|
||||
=item -b <url>
|
||||
|
||||
This URL will be used as the base URL for the URLs that the method is
|
||||
applied to. The base URL only takes effect for relative URLs. If you
|
||||
do not provide this option and the URLs are relative, then they will
|
||||
be treated as files in the local file system.
|
||||
|
||||
=item -t <timeout>
|
||||
|
||||
Set the timeout value for the requests. The timeout is the amount of
|
||||
time that the program will wait for a response from the remote server
|
||||
before it fails. The default unit for the timeout value is seconds.
|
||||
You might append "m" or "h" to the timeout value to make it minutes or
|
||||
hours, respectively. The default timeout is '3m', i.e. 3 minutes.
|
||||
|
||||
=item -i <time>
|
||||
|
||||
Set the If-Modified-Since header in the request. If I<time> it the
|
||||
name of a file, use the modification timestamp for this file. If
|
||||
I<time> is not a file, it is parsed as a literal date. Take a look at
|
||||
L<HTTP::Date> for recogniced formats.
|
||||
|
||||
=item -c <content-type>
|
||||
|
||||
Set the Content-Type for the request. This option is only allowed for
|
||||
requests that take a content, i.e. POST, PUT and CHECKIN. You can
|
||||
force methods to take content by using the C<-f> option together with
|
||||
C<-c>. The default Content-Type for POST is
|
||||
C<application/x-www-form-urlencoded>. The default Content-type for
|
||||
the others is C<text/plain>.
|
||||
|
||||
=item -p <proxy-url>
|
||||
|
||||
Set the proxy to be used for the requests. The program also loads
|
||||
proxy settings from the environment. You can disable this with the
|
||||
C<-P> option.
|
||||
|
||||
=item -C <username>:<password>
|
||||
|
||||
Provide credentials for documents that are protected by Basic
|
||||
Authentication. If the document is protected and you did not specify
|
||||
the username and password with this option, then you will be prompted
|
||||
to provide these values.
|
||||
|
||||
=back
|
||||
|
||||
The following options controls what is displayed by the program:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -u
|
||||
|
||||
Print request method and absolute URL as requests are made.
|
||||
|
||||
=item -U
|
||||
|
||||
Print request headers in addition to request method and absolute URL.
|
||||
|
||||
=item -s
|
||||
|
||||
Print response status code. This option is always on for HEAD requests.
|
||||
|
||||
=item -S
|
||||
|
||||
Print response status chain. This shows redirect and autorization
|
||||
requests that are handled by the library.
|
||||
|
||||
=item -e
|
||||
|
||||
Print response headers. This option is always on for HEAD requests.
|
||||
|
||||
=item -d
|
||||
|
||||
Do B<not> print the content of the response.
|
||||
|
||||
=item -o <format>
|
||||
|
||||
Process HTML content in various ways before printing it. If the
|
||||
content type of the response is not HTML, then this option has no
|
||||
effect. The legal format values are; I<text>, I<ps>, I<links>,
|
||||
I<html> and I<dump>.
|
||||
|
||||
If you specify the I<text> format then the HTML will be formatted as
|
||||
plain latin1 text. If you specify the I<ps> format then it will be
|
||||
formatted as Postscript.
|
||||
|
||||
The I<links> format will output all links found in the HTML document.
|
||||
Relative links will be expanded to absolute ones.
|
||||
|
||||
The I<html> format will reformat the HTML code and the I<dump> format
|
||||
will just dump the HTML syntax tree.
|
||||
|
||||
=item -v
|
||||
|
||||
Print the version number of the program and quit.
|
||||
|
||||
=item -h
|
||||
|
||||
Print usage message and quit.
|
||||
|
||||
=item -x
|
||||
|
||||
Extra debugging output.
|
||||
|
||||
=item -a
|
||||
|
||||
Set text(ascii) mode for content input and output. If this option is not
|
||||
used, content input and output is done in binary mode.
|
||||
|
||||
=back
|
||||
|
||||
Because this program is implemented using the LWP library, it will
|
||||
only support the protocols that LWP supports.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-mirror>, L<LWP>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1995-1997 Gisle Aas.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
$progname = $0;
|
||||
$progname =~ s,.*/,,; # use basename only
|
||||
$progname =~ s/\.\w*$//; # strip extension, if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
|
||||
require LWP;
|
||||
require LWP::Debug;
|
||||
require URI::URL;
|
||||
|
||||
use URI::Heuristic qw(uf_url);
|
||||
|
||||
use HTTP::Status qw(status_message);
|
||||
use HTTP::Date qw(time2str str2time);
|
||||
|
||||
|
||||
# This table lists the methods that are allowed. It should really be
|
||||
# a superset for all methods supported for every scheme that may be
|
||||
# supported by the library. Currently it might be a bit too HTTP
|
||||
# specific. You might use the -f option to force a method through.
|
||||
#
|
||||
# "" = No content in request, "C" = Needs content in request
|
||||
#
|
||||
%allowed_methods = (
|
||||
GET => "",
|
||||
HEAD => "",
|
||||
POST => "C",
|
||||
PUT => "C",
|
||||
DELETE => "",
|
||||
TRACE => "",
|
||||
OPTIONS => "",
|
||||
);
|
||||
|
||||
|
||||
# We make our own specialization of LWP::UserAgent that asks for
|
||||
# user/password if document is protected.
|
||||
{
|
||||
package RequestAgent;
|
||||
@ISA = qw(LWP::UserAgent);
|
||||
|
||||
sub new
|
||||
{
|
||||
my $self = LWP::UserAgent::new(@_);
|
||||
$self->agent("lwp-request/$main::VERSION");
|
||||
$self;
|
||||
}
|
||||
|
||||
sub get_basic_credentials
|
||||
{
|
||||
my($self, $realm, $uri) = @_;
|
||||
if ($main::opt_C) {
|
||||
return split(':', $main::opt_C, 2);
|
||||
} elsif (-t) {
|
||||
my $netloc = $uri->netloc;
|
||||
print "Enter username for $realm at $netloc: ";
|
||||
my $user = <STDIN>;
|
||||
chomp($user);
|
||||
return (undef, undef) unless length $user;
|
||||
print "Password: ";
|
||||
system("stty -echo");
|
||||
my $password = <STDIN>;
|
||||
system("stty echo");
|
||||
print "\n"; # because we disabled echo
|
||||
chomp($password);
|
||||
return ($user, $password);
|
||||
} else {
|
||||
return (undef, undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
|
||||
|
||||
# Parse command line
|
||||
use Getopt::Std;
|
||||
|
||||
$opt_a = undef; # content i/o in text(ascii) mode
|
||||
$opt_m = undef; # set method
|
||||
$opt_f = undef; # make request even if method is not in %allowed_methods
|
||||
$opt_b = undef; # base url
|
||||
$opt_t = undef; # timeout
|
||||
$opt_i = undef; # if-modified-since
|
||||
$opt_c = undef; # content type for POST
|
||||
$opt_C = undef; # credidentials for basic authorization
|
||||
|
||||
$opt_u = undef; # display method, URL and headers of request
|
||||
$opt_U = undef; # display request headers also
|
||||
$opt_s = undef; # display status code
|
||||
$opt_S = undef; # display whole chain of status codes
|
||||
$opt_e = undef; # display response headers (default for HEAD)
|
||||
$opt_d = undef; # don't display content
|
||||
|
||||
$opt_h = undef; # print usage
|
||||
$opt_v = undef; # print version
|
||||
|
||||
$opt_x = undef; # extra debugging info
|
||||
$opt_p = undef; # proxy URL
|
||||
$opt_P = undef; # don't load proxy setting from environment
|
||||
|
||||
$opt_o = undef; # output format
|
||||
|
||||
unless (getopts("axhvuUsSedPp:b:t:i:c:C:m:fo:")) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if ($opt_v) {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
die <<"EOT";
|
||||
This is lwp-request version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1995-1997, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
}
|
||||
|
||||
usage() if $opt_h || !@ARGV;
|
||||
|
||||
LWP::Debug::level('+') if $opt_x;
|
||||
|
||||
# Create the user agent object
|
||||
$ua = new RequestAgent;
|
||||
|
||||
# Load proxy settings from *_proxy environment variables.
|
||||
$ua->env_proxy unless $opt_P;
|
||||
|
||||
$method = uc($opt_m) if defined $opt_m;
|
||||
|
||||
if ($opt_f) {
|
||||
if ($opt_c) {
|
||||
$allowed_methods{$method} = "C"; # force content
|
||||
} else {
|
||||
$allowed_methods{$method} = "";
|
||||
}
|
||||
} elsif (!defined $allowed_methods{$method}) {
|
||||
die "$progname: $method is not an allowed method\n";
|
||||
}
|
||||
|
||||
if ($method eq "HEAD") {
|
||||
$opt_s = 1;
|
||||
$opt_e = 1 unless $opt_d;
|
||||
$opt_d = 1;
|
||||
}
|
||||
|
||||
if (defined $opt_t) {
|
||||
$opt_t =~ /^(\d+)([smh])?/;
|
||||
die "$progname: Illegal timeout value!\n" unless defined $1;
|
||||
$timeout = $1;
|
||||
$timeout *= 60 if ($2 eq "m");
|
||||
$timeout *= 3600 if ($2 eq "h");
|
||||
$ua->timeout($timeout);
|
||||
}
|
||||
|
||||
if (defined $opt_i) {
|
||||
if (-e $opt_i) {
|
||||
$time = (stat _)[9];
|
||||
} else {
|
||||
$time = str2time($opt_i);
|
||||
die "$progname: Illegal time syntax for -i option\n"
|
||||
unless defined $time;
|
||||
}
|
||||
$opt_i = time2str($time);
|
||||
}
|
||||
|
||||
$content = undef;
|
||||
if ($allowed_methods{$method} eq "C") {
|
||||
# This request needs some content
|
||||
unless (defined $opt_c) {
|
||||
# set default content type
|
||||
$opt_c = ($method eq "POST") ?
|
||||
"application/x-www-form-urlencoded"
|
||||
: "text/plain";
|
||||
} else {
|
||||
die "$progname: Illegal Content-type format\n"
|
||||
unless $opt_c =~ m,^[\w\-]+/[\w\-]+(?:\s*;.*)?$,
|
||||
}
|
||||
print "Please enter content ($opt_c) to be ${method}ed:\n"
|
||||
if -t;
|
||||
binmode STDIN unless -t or $opt_a;
|
||||
$content = join("", <STDIN>);
|
||||
} else {
|
||||
die "$progname: Can't set Content-type for $method requests\n"
|
||||
if defined $opt_c;
|
||||
}
|
||||
|
||||
# Set up a request. We will use the same request object for all URLs.
|
||||
$request = new HTTP::Request $method;
|
||||
$request->header('If-Modified-Since', $opt_i) if defined $opt_i;
|
||||
#$request->header('Accept', '*/*');
|
||||
if ($opt_c) { # will always be set for request that wants content
|
||||
$request->header('Content-Type', $opt_c);
|
||||
$request->header('Content-Length', length $content); # Not really needed
|
||||
$request->content($content);
|
||||
}
|
||||
|
||||
|
||||
$errors = 0;
|
||||
|
||||
# Ok, now we perform the requests, one URL at a time
|
||||
while ($url = shift) {
|
||||
# Create the URL object, but protect us against bad URLs
|
||||
eval {
|
||||
if ($url =~ /^\w+:/ || $opt_b) { # is there any scheme specification
|
||||
$url = URI::URL->new($url, $opt_b);
|
||||
} else {
|
||||
$url = uf_url($url);
|
||||
}
|
||||
};
|
||||
if ($@) {
|
||||
$@ =~ s/at\s+\S+\s+line\s\d+//;
|
||||
print STDERR $@;
|
||||
$errors++;
|
||||
next;
|
||||
}
|
||||
|
||||
$ua->proxy($url->scheme, $opt_p) if $opt_p;
|
||||
|
||||
# Send the request and get a response back from the server
|
||||
$request->url($url);
|
||||
$response = $ua->request($request);
|
||||
|
||||
if ($opt_u || $opt_U) {
|
||||
my $url = $response->request->url->as_string;
|
||||
print "$method $url\n";
|
||||
print $response->request->headers_as_string, "\n" if $opt_U;
|
||||
}
|
||||
|
||||
if ($opt_S) {
|
||||
printResponseChain($response);
|
||||
} elsif ($opt_s) {
|
||||
print $response->status_line, "\n";
|
||||
}
|
||||
|
||||
if ($opt_e) {
|
||||
# Display headers
|
||||
print $response->headers_as_string;
|
||||
print "\n"; # separate headers and content
|
||||
}
|
||||
|
||||
if ($response->is_success) {
|
||||
unless ($opt_d) {
|
||||
if ($opt_o &&
|
||||
$response->content_type eq 'text/html') {
|
||||
require HTML::Parse;
|
||||
my $html = HTML::Parse::parse_html($response->content);
|
||||
{
|
||||
$opt_o eq 'ps' && do {
|
||||
require HTML::FormatPS;
|
||||
my $f = new HTML::FormatPS;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'text' && do {
|
||||
require HTML::FormatText;
|
||||
my $f = new HTML::FormatText;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'html' && do {
|
||||
print $html->as_HTML;
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'links' && do {
|
||||
my $base = $response->base;
|
||||
for ( @{ $html->extract_links } ) {
|
||||
my($link, $elem) = @$_;
|
||||
my $tag = uc $elem->tag;
|
||||
$link = new URI::URL $link, $base;
|
||||
print "$tag\t", $link->abs->as_string, "\n";
|
||||
}
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'dump' && do {
|
||||
$html->dump;
|
||||
last;
|
||||
};
|
||||
# It is bad to not notice this before now :-(
|
||||
die "Illegal -o option value ($opt_o)\n";
|
||||
}
|
||||
} else {
|
||||
binmode STDOUT unless $opt_a;
|
||||
print $response->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print STDERR $response->error_as_HTML unless $opt_d;
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
exit $errors;
|
||||
|
||||
|
||||
sub printResponseChain
|
||||
{
|
||||
my($response) = @_;
|
||||
return unless defined $response;
|
||||
printResponseChain($response->previous);
|
||||
my $method = $response->request->method;
|
||||
my $url = $response->request->url->as_string;
|
||||
my $code = $response->code;
|
||||
print "$method $url --> ", $response->status_line, "\n";
|
||||
}
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"EOT";
|
||||
Usage: $progname [-options] <url>...
|
||||
-m <method> use method for the request (default is '$method')
|
||||
-f make request even if $progname believes method is illegal
|
||||
-b <base> Use the specified URL as base
|
||||
-t <timeout> Set timeout value
|
||||
-i <time> Set the If-Modified-Since header on the request
|
||||
-c <conttype> use this content-type for POST, PUT, CHECKIN
|
||||
-a Use text mode for content I/O
|
||||
-p <proxyurl> use this as a proxy
|
||||
-P don't load proxy settings from environment
|
||||
|
||||
-u Display method and URL before any response
|
||||
-U Display request headers (implies -u)
|
||||
-s Display response status code
|
||||
-S Display response status chain
|
||||
-e Display response headers
|
||||
-d Do not display content
|
||||
-o <format> Process HTML content in various ways
|
||||
|
||||
-v Show program version
|
||||
-h Print this message
|
||||
|
||||
-x Extra debugging output
|
||||
EOT
|
||||
}
|
BIN
tools/Perl/bin/PerlCRT.dll
Normal file
BIN
tools/Perl/bin/PerlCRT.dll
Normal file
Binary file not shown.
BIN
tools/Perl/bin/PerlEz.dll
Normal file
BIN
tools/Perl/bin/PerlEz.dll
Normal file
Binary file not shown.
BIN
tools/Perl/bin/PerlMsg.dll
Normal file
BIN
tools/Perl/bin/PerlMsg.dll
Normal file
Binary file not shown.
BIN
tools/Perl/bin/PerlSE.dll
Normal file
BIN
tools/Perl/bin/PerlSE.dll
Normal file
Binary file not shown.
BIN
tools/Perl/bin/PerlSE.pl
Normal file
BIN
tools/Perl/bin/PerlSE.pl
Normal file
Binary file not shown.
BIN
tools/Perl/bin/a2p.exe
Normal file
BIN
tools/Perl/bin/a2p.exe
Normal file
Binary file not shown.
232
tools/Perl/bin/lwp-download
Normal file
232
tools/Perl/bin/lwp-download
Normal file
|
@ -0,0 +1,232 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-download.PL,v 1.7 1998/08/04 09:03:35 aas Exp $
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-download - fetch large files from the net
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-download [-a] <url> [<local file>]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The I<lwp-download> program will down load the document specified by the URL
|
||||
given as the first command line argument to a local file. The local
|
||||
filename used to save the document is guessed from the URL unless
|
||||
specified as the second command line argument.
|
||||
|
||||
The I<lwp-download> program is implemented using the I<libwww-perl>
|
||||
library. It is better suited to down load big files than the
|
||||
I<lwp-request> program because it does not store the file in memory.
|
||||
Another benefit is that it will keep you updated about its progress
|
||||
and that you don't have much options to worry about.
|
||||
|
||||
Use the C<-a> option to save the file in text (ascii) mode. Might make a
|
||||
difference on dosish systems.
|
||||
|
||||
=head1 EXAMPLE
|
||||
|
||||
Fetch the newest and greatest perl version:
|
||||
|
||||
$ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz
|
||||
Saving to 'latest.tar.gz'...
|
||||
1.47 MB received in 22 seconds (68.7 KB/sec)
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <gisle@aas.no>
|
||||
|
||||
=cut
|
||||
|
||||
use LWP::UserAgent;
|
||||
use LWP::MediaTypes;
|
||||
use URI::URL;
|
||||
use strict;
|
||||
|
||||
my $progname = $0;
|
||||
$progname =~ s,.*/,,; # only basename left in progname
|
||||
$progname =~ s/\.\w*$//; # strip extension if any
|
||||
|
||||
#parse option
|
||||
use Getopt::Std;
|
||||
my %opts;
|
||||
unless (getopts('a', \%opts)) {
|
||||
usage();
|
||||
}
|
||||
my $opt_a = $opts{a}; # save in binary mode
|
||||
|
||||
my $url = url(shift || usage());
|
||||
my $argfile = shift;
|
||||
|
||||
my $ua = new LWP::UserAgent;
|
||||
|
||||
$ua->agent("lwp-download/0.1 " . $ua->agent);
|
||||
$ua->env_proxy;
|
||||
|
||||
my $req = new HTTP::Request GET => $url;
|
||||
|
||||
my $file; # name of file we download into
|
||||
my $length; # total number of bytes to download
|
||||
my $flength; # formatted length
|
||||
my $size = 0; # number of bytes received
|
||||
my $start_t; # start time of download
|
||||
my $last_dur; # time of last callback
|
||||
|
||||
my $shown = 0; # have we called the show() function yet
|
||||
|
||||
$SIG{INT} = sub { die "Interrupted\n"; };
|
||||
|
||||
$| = 1; # autoflush
|
||||
|
||||
my $res = $ua->request($req,
|
||||
sub {
|
||||
unless($file) {
|
||||
my $res = $_[1];
|
||||
unless ($argfile) {
|
||||
# must find a suitable name to use. First thing
|
||||
# to do is to look for the "Content-Disposition"
|
||||
# header defined by RFC1806. This is also supported
|
||||
# by Netscape
|
||||
my $cd = $res->header("Content-Disposition");
|
||||
if ($cd && $cd =~ /\bfilename\s*=\s*(\S+)/) {
|
||||
$file = $1;
|
||||
$file =~ s/;$//;
|
||||
$file =~ s/^([\"\'])(.*)\1$/$2/;
|
||||
}
|
||||
|
||||
# if this fails we try to make something from the URL
|
||||
unless ($file) {
|
||||
my $req = $res->request; # now always there
|
||||
my $rurl = $req ? $req->url : $url;
|
||||
|
||||
$file = ($rurl->path_components)[-1];
|
||||
unless (length $file) {
|
||||
$file = "index";
|
||||
my $suffix = media_suffix($res->content_type);
|
||||
$file .= ".$suffix" if $suffix;
|
||||
} elsif ($rurl->scheme eq 'ftp' ||
|
||||
$file =~ /\.tgz$/ ||
|
||||
$file =~ /\.tar(\.(Z|gz))?$/
|
||||
) {
|
||||
# leave the filename as it was
|
||||
} else {
|
||||
my $ct = guess_media_type($file);
|
||||
unless ($ct eq $res->content_type) {
|
||||
# need a better suffix for this type
|
||||
my $suffix = media_suffix($res->content_type);
|
||||
$file .= ".$suffix" if $suffix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Check if the file is already present
|
||||
if (-f $file && -t) {
|
||||
print "Overwrite $file? [y] ";
|
||||
my $ans = <STDIN>;
|
||||
exit if !defined($ans) || !($ans =~ /^y?\n/);
|
||||
} else {
|
||||
print "Saving to '$file'...\n";
|
||||
}
|
||||
} else {
|
||||
$file = $argfile;
|
||||
}
|
||||
open(FILE, ">$file") || die "Can't open $file: $!";
|
||||
binmode FILE unless $opt_a;
|
||||
$length = $res->content_length;
|
||||
$flength = fbytes($length) if defined $length;
|
||||
$start_t = time;
|
||||
$last_dur = 0;
|
||||
}
|
||||
$size += length($_[0]);
|
||||
print FILE $_[0];
|
||||
if (defined $length) {
|
||||
my $dur = time - $start_t;
|
||||
if ($dur != $last_dur) { # don't update too often
|
||||
$last_dur = $dur;
|
||||
my $perc = $size / $length;
|
||||
my $speed;
|
||||
$speed = fbytes($size/$dur) . "/sec" if $dur > 3;
|
||||
my $secs_left = fduration($dur/$perc - $dur);
|
||||
$perc = int($perc*100);
|
||||
my $show = "$perc% of $flength";
|
||||
$show .= " (at $speed, $secs_left remaining)" if $speed;
|
||||
show($show);
|
||||
}
|
||||
} else {
|
||||
show( fbytes($size) . " received");
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if ($res->is_success || $res->message =~ /^Interrupted/) {
|
||||
show(""); # clear text
|
||||
print "\r";
|
||||
print fbytes($size);
|
||||
print " of ", fbytes($length) if defined($length) && $length != $size;
|
||||
print " received";
|
||||
my $dur = time - $start_t;
|
||||
if ($dur) {
|
||||
my $speed = fbytes($size/$dur) . "/sec";
|
||||
print " in ", fduration($dur), " ($speed)";
|
||||
}
|
||||
print "\n";
|
||||
my $died = $res->header("X-Died");
|
||||
if ($died || !$res->is_success) {
|
||||
if (-t) {
|
||||
print "Transfer aborted. Delete $file? [n] ";
|
||||
my $ans = <STDIN>;
|
||||
unlink($file) if defined($ans) && $ans =~ /^y\n/;
|
||||
} else {
|
||||
print "Transfer aborted, $file kept\n";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print "\n" if $shown;
|
||||
print "$progname: Can't download: ", $res->code, " ", $res->message, "\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
|
||||
sub fbytes
|
||||
{
|
||||
my $n = int(shift);
|
||||
if ($n >= 1024 * 1024) {
|
||||
return sprintf "%.3g MB", $n / (1024.0 * 1024);
|
||||
} elsif ($n >= 1024) {
|
||||
return sprintf "%.3g KB", $n / 1024.0;
|
||||
} else {
|
||||
return "$n bytes";
|
||||
}
|
||||
}
|
||||
|
||||
sub fduration
|
||||
{
|
||||
use integer;
|
||||
my $secs = int(shift);
|
||||
my $hours = $secs / (60*60);
|
||||
$secs -= $hours * 60*60;
|
||||
my $mins = $secs / 60;
|
||||
$secs %= 60;
|
||||
if ($hours) {
|
||||
return "$hours hours $mins minutes";
|
||||
} elsif ($mins >= 2) {
|
||||
return "$mins minutes";
|
||||
} else {
|
||||
$secs += $mins * 60;
|
||||
return "$secs seconds";
|
||||
}
|
||||
}
|
||||
|
||||
sub show
|
||||
{
|
||||
my $mess = shift;
|
||||
print "\r$mess", (" " x (75 - length $mess));
|
||||
$shown++;
|
||||
}
|
||||
|
||||
sub usage
|
||||
{
|
||||
die "Usage: $progname [-a] <url> [<lpath>]\n";
|
||||
}
|
104
tools/Perl/bin/lwp-mirror
Normal file
104
tools/Perl/bin/lwp-mirror
Normal file
|
@ -0,0 +1,104 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-mirror.PL,v 1.18 1997/12/03 21:21:00 aas Exp $
|
||||
#
|
||||
# Simple mirror utility using LWP
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-mirror - Simple mirror utility for WWW
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-mirror [-v] [-t timeout] <url> <local file>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program can be used to mirror a document from a WWW server. The
|
||||
document is only transfered if the remote copy is newer than the local
|
||||
copy. If the local copy is newer nothing happens.
|
||||
|
||||
Use the C<-v> option to print the version number of this program.
|
||||
|
||||
The timeout value specified with the C<-t> option. The timeout value
|
||||
is the time that the program will wait for response from the remote
|
||||
server before it fails. The default unit for the timeout value is
|
||||
seconds. You might append "m" or "h" to the timeout value to make it
|
||||
minutes or hours, repectively.
|
||||
|
||||
Because this program is implemented using the LWP library, it only
|
||||
supports the protocols that LWP supports.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-request>, L<LWP>
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@a.sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
use LWP::Simple;
|
||||
use Getopt::Std;
|
||||
|
||||
$progname = $0;
|
||||
$progname =~ s,.*/,,; # use basename only
|
||||
$progname =~ s/\.\w*$//; #strip extension if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.18 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
$opt_h = undef; # print usage
|
||||
$opt_v = undef; # print version
|
||||
$opt_t = undef; # timeout
|
||||
|
||||
unless (getopts("hvt:")) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if ($opt_v) {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
die <<"EOT";
|
||||
This is lwp-mirror version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1995-1996, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
}
|
||||
|
||||
$url = shift or usage();
|
||||
$file = shift or usage();
|
||||
usage() if $opt_h or @ARGV;
|
||||
|
||||
if (defined $opt_t) {
|
||||
$opt_t =~ /^(\d+)([smh])?/;
|
||||
die "$progname: Illegal timeout value!\n" unless defined $1;
|
||||
$timeout = $1;
|
||||
$timeout *= 60 if ($2 eq "m");
|
||||
$timeout *= 3600 if ($2 eq "h");
|
||||
$LWP::Simple::ua->timeout($timeout);
|
||||
}
|
||||
|
||||
$rc = mirror($url, $file);
|
||||
|
||||
if ($rc == 304) {
|
||||
print STDERR "$progname: $file is up to date\n"
|
||||
} elsif (!is_success($rc)) {
|
||||
print STDERR "$progname: $rc ", status_message($rc), " ($url)\n";
|
||||
exit 1;
|
||||
}
|
||||
exit;
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"EOT";
|
||||
Usage: $progname [-options] <url> <file>
|
||||
-v print version number of program
|
||||
-t <timeout> Set timeout value
|
||||
EOT
|
||||
}
|
505
tools/Perl/bin/lwp-request
Normal file
505
tools/Perl/bin/lwp-request
Normal file
|
@ -0,0 +1,505 @@
|
|||
#!perl -w
|
||||
|
||||
# $Id: lwp-request.PL,v 1.33 1998/08/04 09:18:11 aas Exp $
|
||||
#
|
||||
# Simple user agent using LWP library.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-request, GET, HEAD, POST - Simple WWW user agent
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-request [-aeEdvhx] [-m method] [-b <base URL>] [-t <timeout>]
|
||||
[-i <if-modified-since>] [-c <content-type>] [-C <credentials>]
|
||||
[-p <proxy-url>] [-o <format>] <url>...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program can be used to send requests to WWW servers and your
|
||||
local file system. The request content for POST, PUT and CHECKIN
|
||||
methods is read from stdin. The content of the response is printed on
|
||||
stdout. Error messages are printed on stderr. The program returns a
|
||||
status value indicating the number of URLs that failed.
|
||||
|
||||
The options are:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -m <method>
|
||||
|
||||
Set which method to use for the request. If this option is not used,
|
||||
then the method is derived from the name of the program.
|
||||
|
||||
=item -f
|
||||
|
||||
Force request through, even if the program believes that the method is
|
||||
illegal. The server will probably reject the request.
|
||||
|
||||
=item -b <url>
|
||||
|
||||
This URL will be used as the base URL for the URLs that the method is
|
||||
applied to. The base URL only takes effect for relative URLs. If you
|
||||
do not provide this option and the URLs are relative, then they will
|
||||
be treated as files in the local file system.
|
||||
|
||||
=item -t <timeout>
|
||||
|
||||
Set the timeout value for the requests. The timeout is the amount of
|
||||
time that the program will wait for a response from the remote server
|
||||
before it fails. The default unit for the timeout value is seconds.
|
||||
You might append "m" or "h" to the timeout value to make it minutes or
|
||||
hours, respectively. The default timeout is '3m', i.e. 3 minutes.
|
||||
|
||||
=item -i <time>
|
||||
|
||||
Set the If-Modified-Since header in the request. If I<time> it the
|
||||
name of a file, use the modification timestamp for this file. If
|
||||
I<time> is not a file, it is parsed as a literal date. Take a look at
|
||||
L<HTTP::Date> for recogniced formats.
|
||||
|
||||
=item -c <content-type>
|
||||
|
||||
Set the Content-Type for the request. This option is only allowed for
|
||||
requests that take a content, i.e. POST, PUT and CHECKIN. You can
|
||||
force methods to take content by using the C<-f> option together with
|
||||
C<-c>. The default Content-Type for POST is
|
||||
C<application/x-www-form-urlencoded>. The default Content-type for
|
||||
the others is C<text/plain>.
|
||||
|
||||
=item -p <proxy-url>
|
||||
|
||||
Set the proxy to be used for the requests. The program also loads
|
||||
proxy settings from the environment. You can disable this with the
|
||||
C<-P> option.
|
||||
|
||||
=item -C <username>:<password>
|
||||
|
||||
Provide credentials for documents that are protected by Basic
|
||||
Authentication. If the document is protected and you did not specify
|
||||
the username and password with this option, then you will be prompted
|
||||
to provide these values.
|
||||
|
||||
=back
|
||||
|
||||
The following options controls what is displayed by the program:
|
||||
|
||||
=over 4
|
||||
|
||||
=item -u
|
||||
|
||||
Print request method and absolute URL as requests are made.
|
||||
|
||||
=item -U
|
||||
|
||||
Print request headers in addition to request method and absolute URL.
|
||||
|
||||
=item -s
|
||||
|
||||
Print response status code. This option is always on for HEAD requests.
|
||||
|
||||
=item -S
|
||||
|
||||
Print response status chain. This shows redirect and autorization
|
||||
requests that are handled by the library.
|
||||
|
||||
=item -e
|
||||
|
||||
Print response headers. This option is always on for HEAD requests.
|
||||
|
||||
=item -d
|
||||
|
||||
Do B<not> print the content of the response.
|
||||
|
||||
=item -o <format>
|
||||
|
||||
Process HTML content in various ways before printing it. If the
|
||||
content type of the response is not HTML, then this option has no
|
||||
effect. The legal format values are; I<text>, I<ps>, I<links>,
|
||||
I<html> and I<dump>.
|
||||
|
||||
If you specify the I<text> format then the HTML will be formatted as
|
||||
plain latin1 text. If you specify the I<ps> format then it will be
|
||||
formatted as Postscript.
|
||||
|
||||
The I<links> format will output all links found in the HTML document.
|
||||
Relative links will be expanded to absolute ones.
|
||||
|
||||
The I<html> format will reformat the HTML code and the I<dump> format
|
||||
will just dump the HTML syntax tree.
|
||||
|
||||
=item -v
|
||||
|
||||
Print the version number of the program and quit.
|
||||
|
||||
=item -h
|
||||
|
||||
Print usage message and quit.
|
||||
|
||||
=item -x
|
||||
|
||||
Extra debugging output.
|
||||
|
||||
=item -a
|
||||
|
||||
Set text(ascii) mode for content input and output. If this option is not
|
||||
used, content input and output is done in binary mode.
|
||||
|
||||
=back
|
||||
|
||||
Because this program is implemented using the LWP library, it will
|
||||
only support the protocols that LWP supports.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-mirror>, L<LWP>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 1995-1997 Gisle Aas.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
$progname = $0;
|
||||
$progname =~ s,.*/,,; # use basename only
|
||||
$progname =~ s/\.\w*$//; # strip extension, if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
|
||||
require LWP;
|
||||
require LWP::Debug;
|
||||
require URI::URL;
|
||||
|
||||
use URI::Heuristic qw(uf_url);
|
||||
|
||||
use HTTP::Status qw(status_message);
|
||||
use HTTP::Date qw(time2str str2time);
|
||||
|
||||
|
||||
# This table lists the methods that are allowed. It should really be
|
||||
# a superset for all methods supported for every scheme that may be
|
||||
# supported by the library. Currently it might be a bit too HTTP
|
||||
# specific. You might use the -f option to force a method through.
|
||||
#
|
||||
# "" = No content in request, "C" = Needs content in request
|
||||
#
|
||||
%allowed_methods = (
|
||||
GET => "",
|
||||
HEAD => "",
|
||||
POST => "C",
|
||||
PUT => "C",
|
||||
DELETE => "",
|
||||
TRACE => "",
|
||||
OPTIONS => "",
|
||||
);
|
||||
|
||||
|
||||
# We make our own specialization of LWP::UserAgent that asks for
|
||||
# user/password if document is protected.
|
||||
{
|
||||
package RequestAgent;
|
||||
@ISA = qw(LWP::UserAgent);
|
||||
|
||||
sub new
|
||||
{
|
||||
my $self = LWP::UserAgent::new(@_);
|
||||
$self->agent("lwp-request/$main::VERSION");
|
||||
$self;
|
||||
}
|
||||
|
||||
sub get_basic_credentials
|
||||
{
|
||||
my($self, $realm, $uri) = @_;
|
||||
if ($main::opt_C) {
|
||||
return split(':', $main::opt_C, 2);
|
||||
} elsif (-t) {
|
||||
my $netloc = $uri->netloc;
|
||||
print "Enter username for $realm at $netloc: ";
|
||||
my $user = <STDIN>;
|
||||
chomp($user);
|
||||
return (undef, undef) unless length $user;
|
||||
print "Password: ";
|
||||
system("stty -echo");
|
||||
my $password = <STDIN>;
|
||||
system("stty echo");
|
||||
print "\n"; # because we disabled echo
|
||||
chomp($password);
|
||||
return ($user, $password);
|
||||
} else {
|
||||
return (undef, undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$method = uc(lc($progname) eq "lwp-request" ? "GET" : $progname);
|
||||
|
||||
# Parse command line
|
||||
use Getopt::Std;
|
||||
|
||||
$opt_a = undef; # content i/o in text(ascii) mode
|
||||
$opt_m = undef; # set method
|
||||
$opt_f = undef; # make request even if method is not in %allowed_methods
|
||||
$opt_b = undef; # base url
|
||||
$opt_t = undef; # timeout
|
||||
$opt_i = undef; # if-modified-since
|
||||
$opt_c = undef; # content type for POST
|
||||
$opt_C = undef; # credidentials for basic authorization
|
||||
|
||||
$opt_u = undef; # display method, URL and headers of request
|
||||
$opt_U = undef; # display request headers also
|
||||
$opt_s = undef; # display status code
|
||||
$opt_S = undef; # display whole chain of status codes
|
||||
$opt_e = undef; # display response headers (default for HEAD)
|
||||
$opt_d = undef; # don't display content
|
||||
|
||||
$opt_h = undef; # print usage
|
||||
$opt_v = undef; # print version
|
||||
|
||||
$opt_x = undef; # extra debugging info
|
||||
$opt_p = undef; # proxy URL
|
||||
$opt_P = undef; # don't load proxy setting from environment
|
||||
|
||||
$opt_o = undef; # output format
|
||||
|
||||
unless (getopts("axhvuUsSedPp:b:t:i:c:C:m:fo:")) {
|
||||
usage();
|
||||
}
|
||||
|
||||
if ($opt_v) {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
die <<"EOT";
|
||||
This is lwp-request version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1995-1997, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
}
|
||||
|
||||
usage() if $opt_h || !@ARGV;
|
||||
|
||||
LWP::Debug::level('+') if $opt_x;
|
||||
|
||||
# Create the user agent object
|
||||
$ua = new RequestAgent;
|
||||
|
||||
# Load proxy settings from *_proxy environment variables.
|
||||
$ua->env_proxy unless $opt_P;
|
||||
|
||||
$method = uc($opt_m) if defined $opt_m;
|
||||
|
||||
if ($opt_f) {
|
||||
if ($opt_c) {
|
||||
$allowed_methods{$method} = "C"; # force content
|
||||
} else {
|
||||
$allowed_methods{$method} = "";
|
||||
}
|
||||
} elsif (!defined $allowed_methods{$method}) {
|
||||
die "$progname: $method is not an allowed method\n";
|
||||
}
|
||||
|
||||
if ($method eq "HEAD") {
|
||||
$opt_s = 1;
|
||||
$opt_e = 1 unless $opt_d;
|
||||
$opt_d = 1;
|
||||
}
|
||||
|
||||
if (defined $opt_t) {
|
||||
$opt_t =~ /^(\d+)([smh])?/;
|
||||
die "$progname: Illegal timeout value!\n" unless defined $1;
|
||||
$timeout = $1;
|
||||
$timeout *= 60 if ($2 eq "m");
|
||||
$timeout *= 3600 if ($2 eq "h");
|
||||
$ua->timeout($timeout);
|
||||
}
|
||||
|
||||
if (defined $opt_i) {
|
||||
if (-e $opt_i) {
|
||||
$time = (stat _)[9];
|
||||
} else {
|
||||
$time = str2time($opt_i);
|
||||
die "$progname: Illegal time syntax for -i option\n"
|
||||
unless defined $time;
|
||||
}
|
||||
$opt_i = time2str($time);
|
||||
}
|
||||
|
||||
$content = undef;
|
||||
if ($allowed_methods{$method} eq "C") {
|
||||
# This request needs some content
|
||||
unless (defined $opt_c) {
|
||||
# set default content type
|
||||
$opt_c = ($method eq "POST") ?
|
||||
"application/x-www-form-urlencoded"
|
||||
: "text/plain";
|
||||
} else {
|
||||
die "$progname: Illegal Content-type format\n"
|
||||
unless $opt_c =~ m,^[\w\-]+/[\w\-]+(?:\s*;.*)?$,
|
||||
}
|
||||
print "Please enter content ($opt_c) to be ${method}ed:\n"
|
||||
if -t;
|
||||
binmode STDIN unless -t or $opt_a;
|
||||
$content = join("", <STDIN>);
|
||||
} else {
|
||||
die "$progname: Can't set Content-type for $method requests\n"
|
||||
if defined $opt_c;
|
||||
}
|
||||
|
||||
# Set up a request. We will use the same request object for all URLs.
|
||||
$request = new HTTP::Request $method;
|
||||
$request->header('If-Modified-Since', $opt_i) if defined $opt_i;
|
||||
#$request->header('Accept', '*/*');
|
||||
if ($opt_c) { # will always be set for request that wants content
|
||||
$request->header('Content-Type', $opt_c);
|
||||
$request->header('Content-Length', length $content); # Not really needed
|
||||
$request->content($content);
|
||||
}
|
||||
|
||||
|
||||
$errors = 0;
|
||||
|
||||
# Ok, now we perform the requests, one URL at a time
|
||||
while ($url = shift) {
|
||||
# Create the URL object, but protect us against bad URLs
|
||||
eval {
|
||||
if ($url =~ /^\w+:/ || $opt_b) { # is there any scheme specification
|
||||
$url = URI::URL->new($url, $opt_b);
|
||||
} else {
|
||||
$url = uf_url($url);
|
||||
}
|
||||
};
|
||||
if ($@) {
|
||||
$@ =~ s/at\s+\S+\s+line\s\d+//;
|
||||
print STDERR $@;
|
||||
$errors++;
|
||||
next;
|
||||
}
|
||||
|
||||
$ua->proxy($url->scheme, $opt_p) if $opt_p;
|
||||
|
||||
# Send the request and get a response back from the server
|
||||
$request->url($url);
|
||||
$response = $ua->request($request);
|
||||
|
||||
if ($opt_u || $opt_U) {
|
||||
my $url = $response->request->url->as_string;
|
||||
print "$method $url\n";
|
||||
print $response->request->headers_as_string, "\n" if $opt_U;
|
||||
}
|
||||
|
||||
if ($opt_S) {
|
||||
printResponseChain($response);
|
||||
} elsif ($opt_s) {
|
||||
print $response->status_line, "\n";
|
||||
}
|
||||
|
||||
if ($opt_e) {
|
||||
# Display headers
|
||||
print $response->headers_as_string;
|
||||
print "\n"; # separate headers and content
|
||||
}
|
||||
|
||||
if ($response->is_success) {
|
||||
unless ($opt_d) {
|
||||
if ($opt_o &&
|
||||
$response->content_type eq 'text/html') {
|
||||
require HTML::Parse;
|
||||
my $html = HTML::Parse::parse_html($response->content);
|
||||
{
|
||||
$opt_o eq 'ps' && do {
|
||||
require HTML::FormatPS;
|
||||
my $f = new HTML::FormatPS;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'text' && do {
|
||||
require HTML::FormatText;
|
||||
my $f = new HTML::FormatText;
|
||||
print $f->format($html);
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'html' && do {
|
||||
print $html->as_HTML;
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'links' && do {
|
||||
my $base = $response->base;
|
||||
for ( @{ $html->extract_links } ) {
|
||||
my($link, $elem) = @$_;
|
||||
my $tag = uc $elem->tag;
|
||||
$link = new URI::URL $link, $base;
|
||||
print "$tag\t", $link->abs->as_string, "\n";
|
||||
}
|
||||
last;
|
||||
};
|
||||
$opt_o eq 'dump' && do {
|
||||
$html->dump;
|
||||
last;
|
||||
};
|
||||
# It is bad to not notice this before now :-(
|
||||
die "Illegal -o option value ($opt_o)\n";
|
||||
}
|
||||
} else {
|
||||
binmode STDOUT unless $opt_a;
|
||||
print $response->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print STDERR $response->error_as_HTML unless $opt_d;
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
exit $errors;
|
||||
|
||||
|
||||
sub printResponseChain
|
||||
{
|
||||
my($response) = @_;
|
||||
return unless defined $response;
|
||||
printResponseChain($response->previous);
|
||||
my $method = $response->request->method;
|
||||
my $url = $response->request->url->as_string;
|
||||
my $code = $response->code;
|
||||
print "$method $url --> ", $response->status_line, "\n";
|
||||
}
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"EOT";
|
||||
Usage: $progname [-options] <url>...
|
||||
-m <method> use method for the request (default is '$method')
|
||||
-f make request even if $progname believes method is illegal
|
||||
-b <base> Use the specified URL as base
|
||||
-t <timeout> Set timeout value
|
||||
-i <time> Set the If-Modified-Since header on the request
|
||||
-c <conttype> use this content-type for POST, PUT, CHECKIN
|
||||
-a Use text mode for content I/O
|
||||
-p <proxyurl> use this as a proxy
|
||||
-P don't load proxy settings from environment
|
||||
|
||||
-u Display method and URL before any response
|
||||
-U Display request headers (implies -u)
|
||||
-s Display response status code
|
||||
-S Display response status chain
|
||||
-e Display response headers
|
||||
-d Do not display content
|
||||
-o <format> Process HTML content in various ways
|
||||
|
||||
-v Show program version
|
||||
-h Print this message
|
||||
|
||||
-x Extra debugging output
|
||||
EOT
|
||||
}
|
584
tools/Perl/bin/lwp-rget
Normal file
584
tools/Perl/bin/lwp-rget
Normal file
|
@ -0,0 +1,584 @@
|
|||
#!perl -w
|
||||
|
||||
#line 18
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lwp-rget - Retrieve WWW documents recursively
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
lwp-rget [--verbose] [--auth=USER:PASS] [--depth=N] [--hier] [--iis]
|
||||
[--keepext=mime/type[,mime/type]] [--limit=N] [--nospace]
|
||||
[--prefix=URL] [--sleep=N] [--tolower] <URL>
|
||||
lwp-rget --version
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This program will retrieve a document and store it in a local file. It
|
||||
will follow any links found in the document and store these documents
|
||||
as well, patching links so that they refer to these local copies.
|
||||
This process continues until there are no more unvisited links or the
|
||||
process is stopped by the one or more of the limits which can be
|
||||
controlled by the command line arguments.
|
||||
|
||||
This program is useful if you want to make a local copy of a
|
||||
collection of documents or want to do web reading off-line.
|
||||
|
||||
All documents are stored as plain files in the current directory. The
|
||||
file names chosen are derived from the last component of URL paths.
|
||||
|
||||
The options are:
|
||||
|
||||
=over 3
|
||||
|
||||
=item --auth=USER:PASS<n>
|
||||
|
||||
Set the authentication credentials to user "USER" and password "PASS" if
|
||||
any restricted parts of the web site are hit. If there are restricted
|
||||
parts of the web site and authentication credentials are not available,
|
||||
those pages will not be downloaded.
|
||||
|
||||
=item --depth=I<n>
|
||||
|
||||
Limit the recursive level. Embedded images are always loaded, even if
|
||||
they fall outside the I<--depth>. This means that one can use
|
||||
I<--depth=0> in order to fetch a single document together with all
|
||||
inline graphics.
|
||||
|
||||
The default depth is 5.
|
||||
|
||||
=item --hier
|
||||
|
||||
Download files into a hierarchy that mimics the web site structure.
|
||||
The default is to put all files in the current directory.
|
||||
|
||||
=item --iis
|
||||
|
||||
Sends an "Accept: */*" on all URL requests as a workaround for a bug in
|
||||
IIS 2.0. If no Accept MIME header is present, IIS 2.0 returns with a
|
||||
"406 No acceptable objects were found" error. Also converts any back
|
||||
slashes (\\) in URLs to forward slashes (/).
|
||||
|
||||
=item --keepext=I<mime/type[,mime/type]>
|
||||
|
||||
Keeps the current extension for the list MIME types. Useful when
|
||||
downloading text/plain documents that shouldn't all be translated to
|
||||
*.txt files.
|
||||
|
||||
=item --limit=I<n>
|
||||
|
||||
Limit the number of documents to get. The default limit is 50.
|
||||
|
||||
=item --nospace
|
||||
|
||||
Changes spaces in all URLs to underscore characters (_). Useful when
|
||||
downloading files from sites serving URLs with spaces in them. Does not
|
||||
remove spaces from fragments, e.g., "file.html#somewhere in here".
|
||||
|
||||
=item --prefix=I<url_prefix>
|
||||
|
||||
Limit the links to follow. Only URLs that start the prefix string are
|
||||
followed.
|
||||
|
||||
The default prefix is set as the "directory" of the initial URL to
|
||||
follow. For instance if we start lwp-rget with the URL
|
||||
C<http://www.sn.no/foo/bar.html>, then prefix will be set to
|
||||
C<http://www.sn.no/foo/>.
|
||||
|
||||
Use C<--prefix=''> if you don't want the fetching to be limited by any
|
||||
prefix.
|
||||
|
||||
=item --sleep=I<n>
|
||||
|
||||
Sleep I<n> seconds before retrieving each document. This options allows
|
||||
you to go slowly, not loading the server you visiting too much.
|
||||
|
||||
=item --tolower
|
||||
|
||||
Translates all links to lowercase. Useful when downloading files from
|
||||
IIS since it does not serve files in a case sensitive manner.
|
||||
|
||||
=item --verbose
|
||||
|
||||
Make more noise while running.
|
||||
|
||||
=item --quiet
|
||||
|
||||
Don't make any noise.
|
||||
|
||||
=item --version
|
||||
|
||||
Print program version number and quit.
|
||||
|
||||
=item --help
|
||||
|
||||
Print the usage message and quit.
|
||||
|
||||
=back
|
||||
|
||||
Before the program exits the name of the file, where the initial URL
|
||||
is stored, is printed on stdout. All used filenames are also printed
|
||||
on stderr as they are loaded. This printing can be suppressed with
|
||||
the I<--quiet> option.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<lwp-request>, L<LWP>
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gisle Aas <aas@sn.no>
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
|
||||
use Getopt::Long qw(GetOptions);
|
||||
use URI::URL qw(url);
|
||||
use LWP::MediaTypes qw(media_suffix);
|
||||
|
||||
use vars qw($VERSION);
|
||||
use vars qw($MAX_DEPTH $MAX_DOCS $PREFIX $VERBOSE $QUIET $SLEEP $HIER $AUTH $IIS $TOLOWER $NOSPACE %KEEPEXT);
|
||||
|
||||
my $progname = $0;
|
||||
$progname =~ s|.*/||; # only basename left
|
||||
$progname =~ s/\.\w*$//; #strip extension if any
|
||||
|
||||
$VERSION = sprintf("%d.%02d", q$Revision: 1.16 $ =~ /(\d+)\.(\d+)/);
|
||||
|
||||
#$Getopt::Long::debug = 1;
|
||||
#$Getopt::Long::ignorecase = 0;
|
||||
|
||||
# Defaults
|
||||
$MAX_DEPTH = 5;
|
||||
$MAX_DOCS = 50;
|
||||
|
||||
GetOptions('version' => \&print_version,
|
||||
'help' => \&usage,
|
||||
'depth=i' => \$MAX_DEPTH,
|
||||
'limit=i' => \$MAX_DOCS,
|
||||
'verbose!' => \$VERBOSE,
|
||||
'quiet!' => \$QUIET,
|
||||
'sleep=i' => \$SLEEP,
|
||||
'prefix:s' => \$PREFIX,
|
||||
'hier' => \$HIER,
|
||||
'auth=s' => \$AUTH,
|
||||
'iis' => \$IIS,
|
||||
'tolower' => \$TOLOWER,
|
||||
'nospace' => \$NOSPACE,
|
||||
'keepext=s' => \$KEEPEXT{'OPT'},
|
||||
) || usage();
|
||||
|
||||
sub print_version {
|
||||
require LWP;
|
||||
my $DISTNAME = 'libwww-perl-' . LWP::Version();
|
||||
print <<"EOT";
|
||||
This is lwp-rget version $VERSION ($DISTNAME)
|
||||
|
||||
Copyright 1996-1998, Gisle Aas.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
EOT
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $start_url = shift || usage();
|
||||
usage() if @ARGV;
|
||||
|
||||
require LWP::UserAgent;
|
||||
my $ua = new LWP::UserAgent;
|
||||
$ua->agent("$progname/$VERSION " . $ua->agent);
|
||||
$ua->env_proxy;
|
||||
|
||||
unless (defined $PREFIX) {
|
||||
$PREFIX = url($start_url); # limit to URLs below this one
|
||||
eval {
|
||||
$PREFIX->eparams(undef);
|
||||
$PREFIX->equery(undef);
|
||||
};
|
||||
|
||||
$_ = $PREFIX->epath;
|
||||
s|[^/]+$||;
|
||||
$PREFIX->epath($_);
|
||||
$PREFIX = $PREFIX->as_string;
|
||||
}
|
||||
|
||||
%KEEPEXT = map { lc($_) => 1 } split(/\s*,\s*/, $KEEPEXT{'OPT'});
|
||||
|
||||
print <<"" if $VERBOSE;
|
||||
START = $start_url
|
||||
MAX_DEPTH = $MAX_DEPTH
|
||||
MAX_DOCS = $MAX_DOCS
|
||||
PREFIX = $PREFIX
|
||||
|
||||
|
||||
my $no_docs = 0;
|
||||
my %seen = (); # mapping from URL => local_file
|
||||
|
||||
my $filename = fetch($start_url);
|
||||
print "$filename\n" unless $QUIET;
|
||||
|
||||
sub fetch
|
||||
{
|
||||
my($url, $type, $depth) = @_;
|
||||
|
||||
# Fix http://sitename.com/../blah/blah.html to
|
||||
# http://sitename.com/blah/blah.html
|
||||
$url = $url->as_string if (ref($url));
|
||||
while ($url =~ s#(https?://[^/]+/)\.\.\/#$1#) {}
|
||||
|
||||
# Fix backslashes (\) in URL if $IIS defined
|
||||
$url = fix_backslashes($url) if (defined $IIS);
|
||||
|
||||
$url = url($url) unless ref($url);
|
||||
$type ||= 'a';
|
||||
# Might be the background attribute
|
||||
$type = 'img' if ($type eq 'body' || $type eq 'td');
|
||||
$depth ||= 0;
|
||||
|
||||
# Print the URL before we start checking...
|
||||
my $out = (" " x $depth) . $url . " ";
|
||||
$out .= "." x (60 - length($out));
|
||||
print STDERR $out . " " if $VERBOSE;
|
||||
|
||||
# Can't get mailto things
|
||||
if ($url->scheme eq 'mailto') {
|
||||
print STDERR "*skipping mailto*\n" if $VERBOSE;
|
||||
return $url->as_string;
|
||||
}
|
||||
|
||||
# The $plain_url is a URL without the fragment part
|
||||
my $plain_url = $url->clone;
|
||||
$plain_url->frag(undef);
|
||||
|
||||
# Check PREFIX, but not for <IMG ...> links
|
||||
if ($type ne 'img' and $url->as_string !~ /^\Q$PREFIX/o) {
|
||||
print STDERR "*outsider*\n" if $VERBOSE;
|
||||
return $url->as_string;
|
||||
}
|
||||
|
||||
# Translate URL to lowercase if $TOLOWER defined
|
||||
$plain_url = to_lower($plain_url) if (defined $TOLOWER);
|
||||
|
||||
# If we already have it, then there is nothing to be done
|
||||
my $seen = $seen{$plain_url->as_string};
|
||||
if ($seen) {
|
||||
my $frag = $url->frag;
|
||||
$seen .= "#$frag" if defined($frag);
|
||||
$seen = protect_frag_spaces($seen);
|
||||
print STDERR "$seen (again)\n" if $VERBOSE;
|
||||
return $seen;
|
||||
}
|
||||
|
||||
# Too much or too deep
|
||||
if ($depth > $MAX_DEPTH and $type ne 'img') {
|
||||
print STDERR "*too deep*\n" if $VERBOSE;
|
||||
return $url;
|
||||
}
|
||||
if ($no_docs > $MAX_DOCS) {
|
||||
print STDERR "*too many*\n" if $VERBOSE;
|
||||
return $url;
|
||||
}
|
||||
|
||||
# Fetch document
|
||||
$no_docs++;
|
||||
sleep($SLEEP) if $SLEEP;
|
||||
my $req = HTTP::Request->new(GET => $url);
|
||||
# See: http://ftp.sunet.se/pub/NT/mirror-microsoft/kb/Q163/7/74.TXT
|
||||
$req->header ('Accept', '*/*') if (defined $IIS); # GIF/JPG from IIS 2.0
|
||||
$req->authorization_basic(split (/:/, $AUTH)) if (defined $AUTH);
|
||||
my $res = $ua->request($req);
|
||||
|
||||
# Check outcome
|
||||
if ($res->is_success) {
|
||||
my $doc = $res->content;
|
||||
my $ct = $res->content_type;
|
||||
my $name = find_name($res->request->url, $ct);
|
||||
print STDERR "$name\n" unless $QUIET;
|
||||
$seen{$plain_url->as_string} = $name;
|
||||
|
||||
# If the file is HTML, then we look for internal links
|
||||
if ($ct eq "text/html") {
|
||||
# Save an unprosessed version of the HTML document. This
|
||||
# both reserves the name used, and it also ensures that we
|
||||
# don't loose everything if this program is killed before
|
||||
# we finish.
|
||||
save($name, $doc);
|
||||
my $base = $res->base;
|
||||
|
||||
# Follow and substitute links...
|
||||
$doc =~
|
||||
s/
|
||||
(
|
||||
<(img|a|body|area|frame|td)\b # some interesting tag
|
||||
[^>]+ # still inside tag (not strictly correct)
|
||||
\b(?:src|href|background) # some link attribute
|
||||
\s*=\s* # =
|
||||
)
|
||||
(?: # scope of OR-ing
|
||||
(")([^"]*)" | # value in double quotes OR
|
||||
(')([^']*)' | # value in single quotes OR
|
||||
([^\s>]+) # quoteless value
|
||||
)
|
||||
/
|
||||
new_link($1, lc($2), $3||$5, $4||$6||$7, $base, $name, $depth+1)
|
||||
/giex;
|
||||
# XXX
|
||||
# The regular expression above is not strictly correct.
|
||||
# It is not really possible to parse HTML with a single
|
||||
# regular expression, but it is faster. Tags that might
|
||||
# confuse us include:
|
||||
# <a alt="href" href=link.html>
|
||||
# <a alt=">" href="link.html">
|
||||
#
|
||||
}
|
||||
save($name, $doc);
|
||||
return $name;
|
||||
} else {
|
||||
print STDERR $res->code . " " . $res->message . "\n" if $VERBOSE;
|
||||
$seen{$plain_url->as_string} = $url->as_string;
|
||||
return $url->as_string;
|
||||
}
|
||||
}
|
||||
|
||||
sub new_link
|
||||
{
|
||||
my($pre, $type, $quote, $url, $base, $localbase, $depth) = @_;
|
||||
|
||||
$url = protect_frag_spaces($url);
|
||||
|
||||
$url = fetch(url($url, $base)->abs, $type, $depth);
|
||||
$url = url("file:$url", "file:$localbase")->rel
|
||||
unless $url =~ /^[.+\-\w]+:/;
|
||||
|
||||
$url = unprotect_frag_spaces($url);
|
||||
|
||||
return $pre . $quote . $url . $quote;
|
||||
}
|
||||
|
||||
|
||||
sub protect_frag_spaces
|
||||
{
|
||||
my ($url) = @_;
|
||||
|
||||
$url = $url->as_string if (ref($url));
|
||||
|
||||
if ($url =~ m/^([^#]*#)(.+)$/)
|
||||
{
|
||||
my ($base, $frag) = ($1, $2);
|
||||
$frag =~ s/ /%20/g;
|
||||
$url = $base . $frag;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
|
||||
sub unprotect_frag_spaces
|
||||
{
|
||||
my ($url) = @_;
|
||||
|
||||
$url = $url->as_string if (ref($url));
|
||||
|
||||
if ($url =~ m/^([^#]*#)(.+)$/)
|
||||
{
|
||||
my ($base, $frag) = ($1, $2);
|
||||
$frag =~ s/%20/ /g;
|
||||
$url = $base . $frag;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
|
||||
sub fix_backslashes
|
||||
{
|
||||
my ($url) = @_;
|
||||
my ($base, $frag);
|
||||
|
||||
$url = $url->as_string if (ref($url));
|
||||
|
||||
if ($url =~ m/([^#]+)(#.*)/)
|
||||
{
|
||||
($base, $frag) = ($1, $2);
|
||||
}
|
||||
else
|
||||
{
|
||||
$base = $url;
|
||||
$frag = "";
|
||||
}
|
||||
|
||||
$base =~ tr/\\/\//;
|
||||
$base =~ s/%5[cC]/\//g; # URL-encoded back slash is %5C
|
||||
|
||||
return $base . $frag;
|
||||
}
|
||||
|
||||
|
||||
sub to_lower
|
||||
{
|
||||
my ($url) = @_;
|
||||
my $was_object = 0;
|
||||
|
||||
if (ref($url))
|
||||
{
|
||||
$url = $url->as_string;
|
||||
$was_object = 1;
|
||||
}
|
||||
|
||||
if ($url =~ m/([^#]+)(#.*)/)
|
||||
{
|
||||
$url = lc($1) . $2;
|
||||
}
|
||||
else
|
||||
{
|
||||
$url = lc($url);
|
||||
}
|
||||
|
||||
if ($was_object == 1)
|
||||
{
|
||||
return url($url);
|
||||
}
|
||||
else
|
||||
{
|
||||
return $url;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sub translate_spaces
|
||||
{
|
||||
my ($url) = @_;
|
||||
my ($base, $frag);
|
||||
|
||||
$url = $url->as_string if (ref($url));
|
||||
|
||||
if ($url =~ m/([^#]+)(#.*)/)
|
||||
{
|
||||
($base, $frag) = ($1, $2);
|
||||
}
|
||||
else
|
||||
{
|
||||
$base = $url;
|
||||
$frag = "";
|
||||
}
|
||||
|
||||
$base =~ s/^ *//; # Remove initial spaces from base
|
||||
$base =~ s/ *$//; # Remove trailing spaces from base
|
||||
|
||||
$base =~ tr/ /_/;
|
||||
$base =~ s/%20/_/g; # URL-encoded space is %20
|
||||
|
||||
return $base . $frag;
|
||||
}
|
||||
|
||||
|
||||
sub mkdirp
|
||||
{
|
||||
my($directory, $mode) = @_;
|
||||
my @dirs = split(/\//, $directory);
|
||||
my $path = shift(@dirs); # build it as we go
|
||||
my $result = 1; # assume it will work
|
||||
|
||||
unless (-d $path) {
|
||||
$result &&= mkdir($path, $mode);
|
||||
}
|
||||
|
||||
foreach (@dirs) {
|
||||
$path .= "/$_";
|
||||
if ( ! -d $path) {
|
||||
$result &&= mkdir($path, $mode);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
sub find_name
|
||||
{
|
||||
my($url, $type) = @_;
|
||||
#print "find_name($url, $type)\n";
|
||||
|
||||
# Translate spaces in URL to underscores (_) if $NOSPACE defined
|
||||
$url = translate_spaces($url) if (defined $NOSPACE);
|
||||
|
||||
# Translate URL to lowercase if $TOLOWER defined
|
||||
$url = to_lower($url) if (defined $TOLOWER);
|
||||
|
||||
$url = url($url) unless ref($url);
|
||||
|
||||
my $path = $url->path;
|
||||
|
||||
# trim path until only the basename is left
|
||||
$path =~ s|(.*/)||;
|
||||
my $dirname = ".$1";
|
||||
if (!$HIER) {
|
||||
$dirname = "";
|
||||
} elsif (! -d $dirname) {
|
||||
mkdirp($dirname, 0775);
|
||||
}
|
||||
|
||||
my $extra = ""; # something to make the name unique
|
||||
my $suffix;
|
||||
|
||||
if ($KEEPEXT{lc($type)}) {
|
||||
$suffix = ($path =~ m/\.(.*)/) ? $1 : "";
|
||||
} else {
|
||||
$suffix = media_suffix($type);
|
||||
}
|
||||
|
||||
$path =~ s|\..*||; # trim suffix
|
||||
$path = "index" unless length $path;
|
||||
|
||||
while (1) {
|
||||
# Construct a new file name
|
||||
my $file = $dirname . $path . $extra;
|
||||
$file .= ".$suffix" if $suffix;
|
||||
# Check if it is unique
|
||||
return $file unless -f $file;
|
||||
|
||||
# Try something extra
|
||||
unless ($extra) {
|
||||
$extra = "001";
|
||||
next;
|
||||
}
|
||||
$extra++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sub save
|
||||
{
|
||||
my $name = shift;
|
||||
#print "save($name,...)\n";
|
||||
open(FILE, ">$name") || die "Can't save $name: $!";
|
||||
binmode FILE;
|
||||
print FILE $_[0];
|
||||
close(FILE);
|
||||
}
|
||||
|
||||
|
||||
sub usage
|
||||
{
|
||||
die <<"";
|
||||
Usage: $progname [options] <URL>
|
||||
Allowed options are:
|
||||
--auth=USER:PASS Set authentication credentials for web site
|
||||
--depth=N Maximum depth to traverse (default is $MAX_DEPTH)
|
||||
--hier Download into hierarchy (not all files into cwd)
|
||||
--iis Workaround IIS 2.0 bug by sending "Accept: */*" MIME
|
||||
header; translates backslashes (\\) to forward slashes (/)
|
||||
--keepext=type Keep file extension for MIME types (comma-separated list)
|
||||
--limit=N A limit on the number documents to get (default is $MAX_DOCS)
|
||||
--nospace Translate spaces URLs (not #fragments) to underscores (_)
|
||||
--version Print version number and quit
|
||||
--verbose More output
|
||||
--quiet No output
|
||||
--sleep=SECS Sleep between gets, ie. go slowly
|
||||
--prefix=PREFIX Limit URLs to follow to those which begin with PREFIX
|
||||
--tolower Translate all URLs to lowercase (useful with IIS servers)
|
||||
|
||||
}
|
17
tools/Perl/bin/p_uninst.dat
Normal file
17
tools/Perl/bin/p_uninst.dat
Normal file
|
@ -0,0 +1,17 @@
|
|||
$app_name = 'ActivePerl';
|
||||
$is_uninstall_string = 'C:\\WINNT\\uninst.exe -fC:\\Perl\\DeIsL1.isu';
|
||||
$path_info = [
|
||||
'C:\\Perl\\bin'
|
||||
];
|
||||
$iis_virt_dir = [];
|
||||
$iis_script_map = {};
|
||||
$ns_config_dir = undef;
|
||||
$lines_in_file = {};
|
||||
$directory = [
|
||||
'C:\\Perl\\html\\lib'
|
||||
];
|
||||
$file = [
|
||||
'C:\\Perl\\bin\\ppm.bat',
|
||||
'C:\\Perl\\bin\\perlse.pl',
|
||||
'C:\\Perl\\bin/p_uninst.dat'
|
||||
];
|
BIN
tools/Perl/bin/perl.exe
Normal file
BIN
tools/Perl/bin/perl.exe
Normal file
Binary file not shown.
BIN
tools/Perl/bin/perl5.00502.exe
Normal file
BIN
tools/Perl/bin/perl5.00502.exe
Normal file
Binary file not shown.
BIN
tools/Perl/bin/perlcore.dll
Normal file
BIN
tools/Perl/bin/perlcore.dll
Normal file
Binary file not shown.
BIN
tools/Perl/bin/perlglob.exe
Normal file
BIN
tools/Perl/bin/perlglob.exe
Normal file
Binary file not shown.
1108
tools/Perl/bin/ppm.pl
Normal file
1108
tools/Perl/bin/ppm.pl
Normal file
File diff suppressed because it is too large
Load diff
211
tools/Perl/bin/uninstall.pl
Normal file
211
tools/Perl/bin/uninstall.pl
Normal file
|
@ -0,0 +1,211 @@
|
|||
#
|
||||
# Uninstall.pl
|
||||
#
|
||||
# Author: Michael Smith (mikes@ActiveState.com)
|
||||
#
|
||||
# Copyright © 1998 ActiveState Tool Corp., all rights reserved.
|
||||
#
|
||||
###########################################################
|
||||
|
||||
use Win32::Registry;
|
||||
use File::Find;
|
||||
use MetabaseConfig;
|
||||
|
||||
my $data_file = $ARGV[0];
|
||||
my $ENVIRONMENT_KEY = 'SYSTEM\CurrentControlSet\Control\Session Manager\Environment';
|
||||
|
||||
ReadData();
|
||||
Warn();
|
||||
UninstallDependents();
|
||||
CleanPath();
|
||||
RemoveIISVirtDirs();
|
||||
RemoveIISScriptMaps();
|
||||
RemoveLinesFromFiles();
|
||||
RemoveDirectories();
|
||||
RemoveFiles();
|
||||
CallInstallShield();
|
||||
sleep(3);
|
||||
exit(0);
|
||||
|
||||
sub ReadData {
|
||||
print "Reading uninstall data...\n";
|
||||
my $data = '';
|
||||
$rv = open(DATA, "<$data_file");
|
||||
if($rv) {
|
||||
map($data .= $_, <DATA>);
|
||||
close(DATA);
|
||||
eval($data);
|
||||
}else{
|
||||
die "Error reading uninstallation data file. Aborting!!";
|
||||
}
|
||||
}
|
||||
|
||||
sub Warn {
|
||||
print "This will uninstall $app_name. Do you wish to continue?\n";
|
||||
print "[y|N] ==>";
|
||||
my $response = '';
|
||||
while(($response = <STDIN>) !~ /^[\nyn]/i){};
|
||||
if($response !~ /^y/i) {
|
||||
print "Aborting $app_name uninstallation!\n";
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
sub UninstallDependents {
|
||||
my $RegObj = 0;
|
||||
my $UninstallString = '';
|
||||
my $type = 0;
|
||||
my $rv = 0;
|
||||
|
||||
foreach $dependent (@$dependents) {
|
||||
print "$dependent is dependent on $app_name\n" .
|
||||
"and will not function correctly without it.\n" .
|
||||
"Would you like to uninstall $dependent?\n" .
|
||||
"[y|n] ==>";
|
||||
while(($response = <STDIN>) !~ /[yn]/i){};
|
||||
|
||||
if($response =~ /y/i) {
|
||||
$rv = $HKEY_LOCAL_MACHINE->Open("software\\microsoft\\windows\\currentversion\\uninstall\\$dependent", $RegObj);
|
||||
if($rv) {
|
||||
$rv = $RegObj->QueryValueEx("UninstallString", $type, $UninstallString);
|
||||
if($rv) {
|
||||
$RegObj->Close();
|
||||
print $UninstallString;
|
||||
print "Uninstalling $dependent...\n";
|
||||
$rv = (system($UninstallString) ? 0 : 1);
|
||||
}
|
||||
}
|
||||
|
||||
if(!$rv) {
|
||||
print "Error uninstalling $dependent!\n\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub CleanPath {
|
||||
if(@$path_info) {
|
||||
print "Cleaning PATH...\n";
|
||||
my $path = '';
|
||||
if(Win32::IsWinNT) {
|
||||
my $Environment = 0;
|
||||
if($HKEY_LOCAL_MACHINE->Open($ENVIRONMENT_KEY, $Environment)) {
|
||||
if($Environment->QueryValueEx("PATH", $type, $path)) {
|
||||
for $dir (@$path_info) {
|
||||
$dir =~ s/\\/\\\\/g;
|
||||
$path =~ s/$dir;?//ig;
|
||||
}
|
||||
$Environment->SetValueEx("PATH", -1, $type, $path);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
my $file = "$ENV{'SystemDrive'}/autoexec.bat";
|
||||
if(open(FILE, "<$file")) {
|
||||
my @statements = <FILE>;
|
||||
close(FILE);
|
||||
my $path = '';
|
||||
for $statement (@statements) {
|
||||
if($statement =~ /\s+path\s?=/i) {
|
||||
$path = $statement;
|
||||
for $dir (@$path_info) {
|
||||
$dir =~ s/\\/\\\\/g;
|
||||
$path =~ s/$dir;?//ig;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(open(FILE, ">$file")) {
|
||||
print FILE @statements;
|
||||
close(FILE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub RemoveIISVirtDirs {
|
||||
if(@$iis_virt_dir) {
|
||||
print "Removing IIS4 virtual directories...\n";
|
||||
for $virt_dir (@$iis_virt_dir) {
|
||||
$rv = MetabaseConfig::DeleteVirDir(1, $virt_dir);
|
||||
if($rv =~ /^Error/i){
|
||||
print "$rv\n";
|
||||
system('pause');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub RemoveIISScriptMaps {
|
||||
if(keys %$iis_script_map) {
|
||||
print "Removing IIS4 script maps...\n";
|
||||
my $virt_dir = '';
|
||||
for $key (keys %$iis_script_map) {
|
||||
print "Virtual Directory ==> $key\n";
|
||||
for $script_map (@{$iis_script_map->{$key}}) {
|
||||
print "\t$key ==> $script_map\n";
|
||||
$virt_dir = $key;
|
||||
$virt_dir = ($virt_dir eq '.' ? '' : $virt_dir);
|
||||
$rv = MetabaseConfig::RemoveFileExtMapping(1, $virt_dir, $script_map);
|
||||
if($rv =~ /^Error/i){
|
||||
print "$rv\n";
|
||||
system('pause');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub RemoveLinesFromFiles {
|
||||
my $file;
|
||||
|
||||
foreach $file (keys %$lines_in_file) {
|
||||
open(FILE, "<$file") or next;
|
||||
my @lines = <FILE>;
|
||||
close(FILE);
|
||||
open(FILE, ">$file") or next;
|
||||
LINE: foreach $line (@lines) {
|
||||
chomp $line;
|
||||
for ($offset = 0; $offset <= $#{$$lines_in_file{$file}}; $offset++) {
|
||||
if ($line eq $$lines_in_file{$file}[$offset]) {
|
||||
splice(@{$$lines_in_file{$file}}, $offset, 1);
|
||||
next LINE;
|
||||
}
|
||||
}
|
||||
print FILE "$line\n";
|
||||
}
|
||||
close(FILE);
|
||||
}
|
||||
}
|
||||
|
||||
sub RemoveDirectories {
|
||||
if(@$directory) {
|
||||
print "Removing directories...\n";
|
||||
for $dir (@$directory) {
|
||||
finddepth(\&DeleteFiles, $dir);
|
||||
rmdir($dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub RemoveFiles {
|
||||
if(@$file) {
|
||||
print "Removing files...\n";
|
||||
for $file (@$file) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub CallInstallShield {
|
||||
print "Calling InstallShield...\n";
|
||||
system("start $is_uninstall_string");
|
||||
}
|
||||
|
||||
sub DeleteFiles {
|
||||
if(-d $File::Find::name) {
|
||||
rmdir("$File::Find::name");
|
||||
} else {
|
||||
unlink("$File::Find::name");
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue