I actually managed to do this, although a bit messy :
----------------------------------------------
#!/opt/perl/5.8.0/bin/perl
$|++;
use strict;
use FindBin;
use lib "$FindBin::Bin/lib";
use File::Basename;
use POSIX qw(strftime);
use LWP::UserAgent;
use HTTP::Headers;
use HTTP::Request::Common;
use Authen::NTLM;
use HTML::TableExtract;
use HTML::Form;
use HTML::Template;
use MIME::Entity;
my $Options = {
user => "me",
password => "xxxx",
domain => "\\",
timeout => 30,
protocol => "https",
AuthMethod => "NTLM",
BrowserAgent => "MSIE 6.0; Windows NT 5.0",
RequestMethod => "GET",
DataDir => "/tmp",
};
my $log = "/var/tmp/get_url.log";
my $DataDir = "/tmp";
my $browser = LWP::UserAgent->new(
agent=>'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
keep_alive=>'1'
);
my $header = HTTP::Headers->new(
Content_Type => 'text/html',
'WWW-Authenticate' => $Options->{'AuthMethod'}
);
#
# First stage of NTLM authentication
#
my $url = "https://egserver/Reports/Userlist_20080212.csv";
ntlm_domain($Options->{'domain'});
ntlm_user($Options->{'user'});
ntlm_password($Options->{'password'});
my $Authorization = Authen::NTLM::ntlm();
my $header = HTTP::Headers->new(
Content_Type => 'text/html',
'WWW-Authenticate' => $Options->{'AuthMethod'}
);
$header->header('Authorization' => "NTLM $Authorization");
my $request = HTTP::Request->new($Options->{'RequestMethod'} => $url, $header);
my $res = $browser->request( $request );
#
# Second stage of authentication
#
my $Challenge = $res->header('WWW-Authenticate');
$Challenge =~ s/^NTLM //g;
$Authorization = Authen::NTLM::ntlm($Challenge);
$header->header('Authorization' => "NTLM $Authorization");
$request = HTTP::Request->new($Options->{'RequestMethod'} => $url, $header);
$res = $browser->request($request);
#
# ntlm needs to be resetted after second stage
#
ntlm_reset();
if($res->is_success) {
&dump2file("$Options->{'DataDir'}/test_url", $res->content);
}
else {
&out2logfile($log, "ERROR 1 : Can not dump data from $url\n Returned code: " . $res->code . " (" . $res->status_line . ")\n");
}
sub dump2file {
my ($FileName, $Message) = @_;
my $Flag;
my $LogDir = dirname($FileName);
system("/bin/mkdir -p $LogDir") unless(-d $LogDir);
if(open(FILE, "> $FileName")){
for(my $i=0 ; $i<10 ; $i++){
$Flag = flock(FILE,2);
last if($Flag);
sleep(1);
}
unless($Flag){
return 1;
}
print FILE $Message, "\n";
unless(flock(FILE,8)){
return 1;
}
close(FILE);
}
return 0;
}
sub out2logfile {
my ($FileName, $Message, $PrintError, $Option) = @_;
my $Flag;
my $LogDir = dirname($FileName);
system("/bin/mkdir -p $LogDir") unless(-d $LogDir);
$Option=">>" if(!defined($Option) || $Option ne ">");
print $Message if($PrintError);
if(open(FILE, "$Option $FileName")){
for(my $i=0 ; $i<10 ; $i++){
$Flag = flock(FILE,2);
last if($Flag);
sleep(1);
}
unless($Flag){
return 1;
}
my $time=strftime "%Y-%m-%d %H:%M ", localtime;
print FILE $time, $Message;
unless(flock(FILE,8)){
return 1;
}
close(FILE);
}
return 0;
}
==========================================================
Any comments on this? it could use some improving..
Now I need to figure out how to get the latest report automatically based on the date of the report (the latest is always yesterday's report). Right now I'm hardcoding it :
my $url = "https://egserver/Reports/Userlist_20080212.csv";
Any suggestion how I can make it download the right file automatically by manipulating the _20080212 part? Its a stupid (and lazy) question, I guess I'm gonna look into the books now and see...(maybe use strftime in a way).
Thanks!