LinuxQuestions.org
Welcome to the most active Linux Forum on the web.
Home Forums Tutorials Articles Register
Go Back   LinuxQuestions.org > Blogs > rainbowsally
User Name
Password

Notices


Rate this Entry

C/C++ dosex (irregular expressions :) for copying, renaming with wildcards

Posted 04-01-2012 at 11:42 PM by rainbowsally

dosex (irregular expressions for copying, renaming with wildcards

Features:
  • input a name and a wildcards string to see if the name matches
  • optionally input an wildcards string for a new name and get a very good clone of DOS
    functionality.

This was the last missing piece for the filenames functions which will be included in libLQ shortly.

file: src/main.c
purpose: demonstrate mixing C declarations and C++ functionality, also 'dosex' demo.
Code:
// main.cpp - tester for dosex irregular expressions and filename
// functions to be included in libLQ.

#include <stdio.h>    // printf()
#include <string.h>   // strcpy()
#include <unistd.h>   // sleep()

#include "filename_match.h"

void dbg(){}

char* dosex(const char* inname, const char* findwild, const char* replacewild)
{
  static char outname[FILENAME_MAXSTR];
  int ok = filename_match(inname, findwild);
  if(ok)
    ok = filename_matchNew(outname, FILENAME_MAXSTR, inname, replacewild);
  if(ok)
    return outname;
  else
    return 0;
}

int main(int argc, char** argv)
{
  dbg();
  //  test_init();
  //  match_findstr();
  const char* s;
  const char* findstr, *replacestr, *inputstr;
  char** p;
  const char* default_args[3] =
  {
    // these have been tested against the same strings using cmd.exe
    // in Windows.
    "barkedupon.ayz", "*a*e*p?n.?yz", "???-01.x*" // sb bar-01.xyz
    //    "abc-def", "*-def", "*-xyz" // sb: abc-xyz
    //    "abc-def", "*-*", "test-*" // sb: test-ef
    //    "abc3defg", "*3*", "test4*"  // sb: test4efg
    //    "abc.defg", "*.*", "test4*"  // sb: test.defg
    //    "abc.defg", "*3*", "test4*"  // sb: not a match
  };
  
  if(argc == 4)
    p = argv+1;
  else
  {
    printf("\nYou can input quoted args on the commandline too.\n"
    "We'll use a default test setup this time around.\n\n");
    sleep(1.5);
    p = (char**)default_args;
  }
  
  inputstr = *p++;
  findstr = *p++;
  replacestr = *p;
  printf("Using args\n"
  "   inputstr:   %s\n"
  "   findstr:    %s\n"
  "   replacestr: %s\n",
  inputstr, findstr, replacestr);
  s = dosex(inputstr, findstr, replacestr);
  
  if(s)
    printf("Output -> '%s'\n", s);
  else
    printf("Can't copy/replace input strings\n");
  return 0;
}
file: src/filename_match.cpp
purpose: wildcard criteria for filename matches and generating new names
Code:
// filename_match.cpp - wildcard pattern checking for existing 
// files and for generating new names for renaming, or copying,
// also based on wildcard pattern criteria.

#include <stdio.h>    // fgets
#include <string.h>   // strlen()
#include <malloc.h>   // malloc

#include "filename_match.h"

static const char* pattern_old;
static const char* pattern_new;

// working buffers for matchNew
static char buf_in[FILENAME_MAXSTR];        // input name for matchNew
static char buf_out[FILENAME_MAXSTR];       // output name from matchNew

// returns true if string s matches pattern p which may include 
// wildcard chars '*' and '?'.  Handles escaped chars in pattern
// allowing avoidance of trigraph errors, etc.
static bool _match_existing (const char* s, const char* p)
{
  for (;;)
  {
    switch (*p++)
    {
      case '?':              // matches any character
          if (!*s++)
            return false;
          break;
          
      case '*':              // match 0-n of any characters
          if (!*p)           // end of wildcards?
            return true;     // matches all the rest
            while (!_match_existing(s, p))
              if (!*s++)
                return false;
              return true;
            
      case 0:                 // end of pattern
          return !*s;         // true if also end of s
          
      case '\\':              // next character literal
          if (*p)
            p++;              // falls through to default
      default:
        if (*s++ != *(p - 1)) // do letters match?
          return false;
        break;
    } // switch(*p++)
  }
}

/* based on part of copy300 by Marty Peritsky
 * License: GPL v.2+.
 */

static bool _match_new(const char *f, const char *p)
{
  int i;
  
  for(i = 0; i < FILENAME_MAXSTR; i++)
    buf_out[i] = '\0';
  
  i = 0;
  do{
    switch (*p){
      case '?':
      {
        switch (*f){
          case '.':
          case '\0':
            p++; 
            break;
          default:
            buf_out[i++] = *(f++); p++; 
            break;
        } // switch(*f)
        break;
      } // case '?'
      
          case '*':
          {
            switch (*f) {
              case '.':
              case '\0':
                p++; 
                break;
              default:
                do{ buf_out[i++] = *(f++);
                }while((*f != '.') && (*f != '\0') && (i < FILENAME_MAXSTR));
                do{ p++; 
                }while((*p != '.') && (*p != '\0'));
                break;
            } // switch(*f)
            break;
          } // case '*':
              case '.':
              {
                switch (*f) {
                  case '.':
                    f++;
                    // falls through
                  case '\0':
                    buf_out[i++] = '.'; p++; 
                    break;
                  default:
                    do{ f++; 
                    }while((*f != '.') && (*f != '\0'));
                    break;
                } // switch(*f)
                break;
              } // case '.'
                  case '\0':
                    break;
                  default:
                  {
                    switch (*f)
                    {
                      case '.':
                      case '\0':
                        break;
                      default:
                        f++;
                        break;
                    } // switch(*f)
                    buf_out[i++] = *(p++);
                    break;
                  } // default
    }
  }while(((*p) != '\0') && (i < FILENAME_MAXSTR));
  return *buf_out != 0;
}


int filename_match(const char* name, const char* pattern)
{
  // save old name in case we also want a new name
  strcpy(buf_in, name);
  return _match_existing(name, pattern);
}

// assumes filename_match as been run successfully first.
int filename_matchNew(char* name_out, int outlen, const char* name_in, const char* pattern)
{
  *name_out = 0;
  _match_new(name_in, pattern);
  if((*buf_out) && (strlen(buf_out) < outlen))
  {
    strcpy(name_out, buf_out);
    return 1;
  }
  return 0;
}
file: src/filename_match.h
purpose: headers for filename_match.cpp usable from straight C
Code:
// filename_match.h

/* 
 *  Gleaned from several GPL'd sourced including gtk, xcopy300 and fltk v. 2.0
 *  License GPL 2+, -rs
 */

#ifndef filename_match_h
#define filename_match_h

// #include <LQ/cdecls.h>
// -------------------------
#ifdef __cplusplus
#define LQ_BEGIN_CDECLS extern "C" {
#define LQ_END_CDECLS }
#else
#define LQ_BEGIN_CDECLS
#define LQ_END_CDECLS
#endif
// -------------------------

LQ_BEGIN_CDECLS;

/// old and new names and wildcards may include paths
#define FILENAME_MAXSTR 1024

/// returns non-zero if name matches wildcard pattern.
int filename_match(const char *name, const char *pattern);

/// Presumes filename_match() has returned successfuly.
/// Sets new name based on wildcard pattern at user 
/// supplied buffer and returns non-zero if successful.
/// Note: If used to copy or rename files, the user is 
/// responsible for checking if anything could be 
/// overwritten that shouldn't be.
int filename_matchNew(char *name_out, int outlen,
                      const char *name_in, const char *pattern);

LQ_END_CDECLS;

#endif        // filename_match_h
I have tested this against windows XP (cmd.exe) and it is a very solid clone. And whether or not you like Windows, having wildcard replacements is a feature sorely lacking in linux. Ye Olde 'for i in <blah> do, etc.' is sometimes more than a little bit intimidating.

Want to give this a spin?

To generate the makefile with the old makefile-creator type
Code:
makefile-creator c dosex
make clean; make
That will generate the dosex 'irregular expressions' program for testing.

Here's the old makefile-creator, soon to be obsoleted, but works.
http://www.linuxquestions.org/questi...-part-2-34421/

Then 'dosex' or './dosex' depending on whether or not '.' is in your path.

If you want to test other names and wildcards, don't forget to quote the args so the splats aren't expanded by the shell.

:-)
Posted in Uncategorized
Views 25089 Comments 6
« Prev     Main     Next »
Total Comments 6

Comments

  1. Old Comment
    Also see glob(3).
    Posted 04-02-2012 at 09:53 PM by ntubski ntubski is online now
  2. Old Comment
    Glob doesn't work for 'replace' strings. Or if it does, I've never seen it done.
    Posted 04-05-2012 at 08:49 AM by rainbowsally rainbowsally is offline
  3. Old Comment
    So DOS has a replace strings with wildcards? How does it work?
    Posted 04-05-2012 at 09:30 AM by ntubski ntubski is online now
  4. Old Comment
    Compile the demo. It has one preset pair of strings. You can chance the default oldname/newname wildcards in the program or input them from the commandline.

    Remember to quote the args so the shell doesn't expand them behind your back.

    :-)
    Posted 04-05-2012 at 07:12 PM by rainbowsally rainbowsally is offline
  5. Old Comment
    So it looks like replacestr is matched against inputstr, with literals in replacestr acting as "?" for matching but get output as themselves, and wildcards are output as whatever they match against in inputstr. findstr is irrelevant to the output as long as it matches inputstr.

    I may have found a bug, one of the example arguments doesn't give the expected results:
    Code:
    % ./dosex abc.defg '*.*' 'test4*'
    Using args
       inputstr:   abc.defg
       findstr:    *.*
       replacestr: test4*
    Output -> 'test4'
    Getting "test4" instead of "test.defg" (hmm, I just realized that if "test.defg" really is the expected result, my description above is wrong). It seems like findstr is matching wildcards to "." (like in unix), but replacestr doesn't (like in DOS):
    Code:
    % ./dosex . '*' x
    Using args
       inputstr:   .
       findstr:    *
       replacestr: x
    Output -> 'x'
    % ./dosex . '*' '*'
    Using args
       inputstr:   .
       findstr:    *
       replacestr: *
    Can't copy/replace input strings
    How do you access this functionality in DOS?
    Posted 04-07-2012 at 09:06 AM by ntubski ntubski is online now
  6. Old Comment
    Hi ntubski.

    It's hard to expect what ms products produce. :-)

    Tested against XP cmd.exe and the same unexpected results occur. I thought it best to remain DOS compatible in case anyone wanted to duplicate any old dos 'move' or 'copy/xcopy' 'ren' types of functions so I didn't try to change any of the counterintuitive stuff.

    And re changing the name of '.' input file name. Dot is considered the end of a substring in DOS. If you look at the algorithm you can see why that failed to match.

    :-)
    Posted 04-09-2012 at 05:56 AM by rainbowsally rainbowsally is offline
 

  



All times are GMT -5. The time now is 06:53 AM.

Main Menu
Advertisement
Advertisement
My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
Open Source Consulting | Domain Registration