Plastech |
05-22-2007 10:27 PM |
Simple perl help: regular expressions?
I would like to modify a perl script which downloads all sizes of a photo from flickr to one that only downloads the large sized image (i.e. one that ends in _b.jpg but not _t.jpg, etc.). I feel like this should be done with the sizes array, but regexps are beyond me right now. Here is the code:
Code:
#!/usr/bin/perl -- # -*- Perl -*-
use strict;
use Getopt::Std;
use vars qw($opt_c $opt_d $opt_m $opt_n);
use Digest::MD5 qw(md5_hex);
my $INFO = undef;
my $ROOT = undef;
my $usage = "Usage: $0 [-c config] [-d] [-m maxdl] [-n]\n";
die $usage if ! getopts('c:dm:n');
my $delextra = $opt_d ? 1 : 0;
my $config = $opt_c || $ENV{'HOME'} . "/.flocl.xml";
my $max = $opt_m || 500;
my $dryrun = $opt_n ? 1 : 0;
my %unavail = ('Square-size' => 959,
'Square-md5' => 'ad9a068056d12def5b846e53148b5a60',
'Thumbnail-size' => 1004,
'Thumbnail-md5' => '0b51e4a30409a6e900846d51480f42a4',
'Small-size' => 1699,
'Small-md5' => '1e984c37962d177c7f847fd40aeb5628',
'Medium-size' => 2900,
'Medium-md5' => 'e3cc1d6e68d1c341e971340df2fc1e92',
'Large-size' => 2873,
'Large-md5' => 'b1f67752c93a4d1d476ed6339e078931',
'Original-size' => 2873,
'Original-md5' => 'b1f67752c93a4d1d476ed6339e078931',
);
open (F, $config) || die "Configuration file \"$config\" must exist.\n";
read (F, $_, -s $config);
close (F);
$INFO = $2 if /<([^>:]*\:?info)>(.*?)<\/\1>/s;
$ROOT = $2 if /<([^>:]*\:?root)>(.*?)<\/\1>/s;
die "Root directory \"$ROOT\" must exist.\n" if ! -d $ROOT;
my $bindir = $0;
$bindir =~ s/\/[^\/]+$//;
my %images = ();
my %backup = ();
open (F, $INFO) || die "Flickr info file \"$INFO\" must exist.\n";
read (F, $_, -s $INFO);
close (F);
# A bit of a hack, but the sizes are all we care about
my @sizes = /<size\s.*?>/sg;
foreach my $size (@sizes) {
my $label = undef;
my $source = undef;
$label = $2 if $size =~ /\slabel=([\"\'])(.*?)\1/s;
$source = $2 if $size =~ /\ssource=([\"\'])(.*?)\1/s;
if (defined $source) {
my $file = $source;
$file =~ s/http:\/\///;
$file = "$ROOT/$file";
$images{$file} = $source;
$backup{$file} = $label if ! okLocal($file, $label);
}
}
my @files = sort keys %backup;
my $total = $#files + 1;
$max = $total if $max > $total;
my $count = 0;
if (@files) {
print "$total images to backup...\n";
foreach my $file (@files) {
$count++;
backupImage($images{$file}, $file, $backup{$file});
last if $count >= $max;
}
}
if ($delextra) {
open (FIND, "find $ROOT -type f -print |");
while (<FIND>) {
chop;
next if /\/web\//;
next if /\/cgi-bin\//;
next if /\/\.svn\//;
next if /\/\.CVS\//;
next if /\.xml$/;
if (! exists($images{$_})) {
if ($dryrun) {
print "Would remove: $_\n";
} else {
print "Remove: $_\n";
unlink $_;
}
}
}
}
exit 0;
sub okLocal {
my $file = shift;
my $label = shift;
return 0 if ! -f $file;
if (-f $file) {
if (exists $unavail{"$label-size"} && -s $file == $unavail{"$label-size"}) {
open (F, $file);
read F, $_, -s $file;
close (F);
my $md5 = md5_hex($_);
if ($unavail{"$label-md5"} eq $md5) {
print STDERR "Warning: unavailable? $file\n";
unlink $file if !$dryrun;
return $dryrun;
}
} elsif (-s $file == 0) {
print STDERR "Warning: empty? $file\n";
unlink $file if !$dryrun;
return $dryrun;
}
}
return 1;
}
sub backupImage {
my $image = shift;
my $file = shift;
my $label = shift;
local $_;
my $dir = $file;
$dir =~ s/^(.*?)\/[^\/]+$/$1/;
rmkdir($dir) if ! $dryrun && ! -d $dir;
if ($dryrun) {
print "Would back up $image ($count/$max";
print " of $total" if $total > $max;
print ")\n";
} else {
print "Backing up $image ($count/$max";
print " of $total" if $total > $max;
print ")\n";
system("$bindir/runwget -O $file $image");
sleep 4; # be nice to flickr
}
}
sub rmkdir {
my $dir = shift;
my @dirs = split(/\//, $dir);
my $path = "/";
shift @dirs;
while (@dirs) {
$path .= shift @dirs;
if (! -d $path) {
mkdir($path, 0755) || die "Failed to create $path\n";
}
$path .= "/";
}
}
What do you think? Doable? Thanks...
|