LinuxQuestions.org

LinuxQuestions.org (/questions/)
-   Programming (https://www.linuxquestions.org/questions/programming-9/)
-   -   Simple perl help: regular expressions? (https://www.linuxquestions.org/questions/programming-9/simple-perl-help-regular-expressions-555940/)

Plastech 05-22-2007 10:27 PM

Simple perl help: regular expressions?
 
I would like to modify a perl script which downloads all sizes of a photo from flickr to one that only downloads the large sized image (i.e. one that ends in _b.jpg but not _t.jpg, etc.). I feel like this should be done with the sizes array, but regexps are beyond me right now. Here is the code:

Code:

#!/usr/bin/perl -- # -*- Perl -*-

use strict;
use Getopt::Std;
use vars qw($opt_c $opt_d $opt_m $opt_n);
use Digest::MD5 qw(md5_hex);

my $INFO = undef;
my $ROOT = undef;
my $usage = "Usage: $0 [-c config] [-d] [-m maxdl] [-n]\n";
die $usage if ! getopts('c:dm:n');

my $delextra = $opt_d ? 1 : 0;
my $config = $opt_c || $ENV{'HOME'} . "/.flocl.xml";
my $max = $opt_m || 500;
my $dryrun = $opt_n ? 1 : 0;

my %unavail = ('Square-size' => 959,
              'Square-md5' => 'ad9a068056d12def5b846e53148b5a60',
              'Thumbnail-size' => 1004,
              'Thumbnail-md5' => '0b51e4a30409a6e900846d51480f42a4',
              'Small-size' => 1699,
              'Small-md5' => '1e984c37962d177c7f847fd40aeb5628',
              'Medium-size' => 2900,
              'Medium-md5' => 'e3cc1d6e68d1c341e971340df2fc1e92',
              'Large-size' => 2873,
              'Large-md5' => 'b1f67752c93a4d1d476ed6339e078931',
              'Original-size' => 2873,
              'Original-md5' => 'b1f67752c93a4d1d476ed6339e078931',
    );

open (F, $config) || die "Configuration file \"$config\" must exist.\n";
read (F, $_, -s $config);
close (F);

$INFO = $2 if /<([^>:]*\:?info)>(.*?)<\/\1>/s;
$ROOT = $2 if /<([^>:]*\:?root)>(.*?)<\/\1>/s;

die "Root directory \"$ROOT\" must exist.\n" if ! -d $ROOT;

my $bindir = $0;
$bindir =~ s/\/[^\/]+$//;

my %images = ();
my %backup = ();

open (F, $INFO) || die "Flickr info file \"$INFO\" must exist.\n";
read (F, $_, -s $INFO);
close (F);

# A bit of a hack, but the sizes are all we care about
my @sizes = /<size\s.*?>/sg;

foreach my $size (@sizes) {
    my $label = undef;
    my $source = undef;
    $label = $2 if $size =~ /\slabel=([\"\'])(.*?)\1/s;
    $source = $2 if $size =~ /\ssource=([\"\'])(.*?)\1/s;

    if (defined $source) {
        my $file = $source;
        $file =~ s/http:\/\///;
        $file = "$ROOT/$file";
        $images{$file} = $source;
        $backup{$file} = $label if ! okLocal($file, $label);
    }
}

my @files = sort keys %backup;
my $total = $#files + 1;
$max = $total if $max > $total;
my $count = 0;

if (@files) {
    print "$total images to backup...\n";
    foreach my $file (@files) {
        $count++;
        backupImage($images{$file}, $file, $backup{$file});
        last if $count >= $max;
    }
}

if ($delextra) {
    open (FIND, "find $ROOT -type f -print |");
    while (<FIND>) {
        chop;
        next if /\/web\//;
        next if /\/cgi-bin\//;
        next if /\/\.svn\//;
        next if /\/\.CVS\//;
        next if /\.xml$/;

        if (! exists($images{$_})) {
            if ($dryrun) {
                print "Would remove: $_\n";
            } else {
                print "Remove: $_\n";
                unlink $_;
            }
        }
    }
}

exit 0;

sub okLocal {
    my $file = shift;
    my $label = shift;

    return 0 if ! -f $file;

    if (-f $file) {
        if (exists $unavail{"$label-size"} && -s $file == $unavail{"$label-size"}) {
            open (F, $file);
            read F, $_, -s $file;
            close (F);
            my $md5 = md5_hex($_);
            if ($unavail{"$label-md5"} eq $md5) {
                print STDERR "Warning: unavailable? $file\n";
                unlink $file if !$dryrun;
                return $dryrun;
            }
        } elsif (-s $file == 0) {
            print STDERR "Warning: empty? $file\n";
            unlink $file if !$dryrun;
            return $dryrun;
        }
    }

    return 1;
}

sub backupImage {
    my $image = shift;
    my $file = shift;
    my $label = shift;
    local $_;

    my $dir = $file;
    $dir =~ s/^(.*?)\/[^\/]+$/$1/;

    rmkdir($dir) if ! $dryrun && ! -d $dir;

    if ($dryrun) {
        print "Would back up $image ($count/$max";
        print " of $total" if $total > $max;
        print ")\n";
    } else {
        print "Backing up $image ($count/$max";
        print " of $total" if $total > $max;
        print ")\n";
        system("$bindir/runwget -O $file $image");
        sleep 4; # be nice to flickr
    }
}

sub rmkdir {
    my $dir = shift;
    my @dirs = split(/\//, $dir);

    my $path = "/";
    shift @dirs;

    while (@dirs) {
        $path .= shift @dirs;
        if (! -d $path) {
            mkdir($path, 0755) || die "Failed to create $path\n";
        }
        $path .= "/";
    }
}

What do you think? Doable? Thanks...

Plastech 05-23-2007 01:26 AM

Booyah! Solved my problems. Thanks anyways!

archtoad6 05-24-2007 07:35 AM

Now please post your solution so the next one can benefit.

Plastech 05-25-2007 02:53 PM

Sorry, I'm at a my bro's wedding this weekend. Will post when I return, though :)


All times are GMT -5. The time now is 12:53 AM.