how to scp files only after they've been there for X seconds

anon091 · 08-22-2011, 03:09 PM

hey sz. unfortunately its a program that spits out these files, so no script. plus i need to scp then move the files to another directory as well so they dont keep getting scp'd over and over. but then i'd have to have some crazy find, and i dont do very well combining commands. plus i was worried about grabbing the files before they finished writing.

at least i'm learning a lot trying to figure this out.

Nominal Animal · 08-26-2011, 05:55 PM

Here is the source for a C utility you can use to solve this problem in an easy way.

The source code is very unpolished (so much so I almost did not post it at all), but it should be very robust and safe to use. It is just ugly, I think. Save the following as movecompleted.c:

Code:

#define  _GNU_SOURCE
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <time.h>

#define   STATUS_OK          0
#define   STATUS_FAILURE     32

#define   STATUSBIT_RELINK   1
#define   STATUSBIT_UNLINK   2
#define   STATUSBIT_UNLEASE  4
#define   STATUSBIT_NAME     8
#define   STATUSBIT_CLOSE    16

/* Open file read-only.
*/
static inline int rdopen(const char *const filename)
{
	int descriptor;

	if (!filename || !*filename) {
		errno = EINVAL;
		return -1;
	}

	do {
		descriptor = open(filename, O_RDONLY | O_NOCTTY);
	} while (descriptor == -1 && errno == EINTR);
	return descriptor;
}

/* Return last modification time in seconds, relative. 0 if error.
*/
static inline int last_modified(const int descriptor)
{
	struct stat info;
	double      seconds;
	int         result;

	do {
		result = fstat(descriptor, (struct stat *)&info);
	} while (result == -1 && errno == EINTR);
	if (result == -1) {
		/* Note: error in errno. */
		return 0;
	}

	seconds = difftime(time(NULL), info.st_mtime);
	if (seconds >= 2147483647.0)
		result = 2147483647;
	else
	if (seconds <= 0.0)
		result = 0;
	else
		result = (int)seconds;

	if (!result)
		errno = 0;

	return result;
}

/* Get a read lease on the descriptor, returning 0 or errno.
*/
static inline int rdlease(const int descriptor)
{
	int result;

	do {
		result = fcntl(descriptor, F_SETLEASE, F_RDLCK);
	} while (result == -1 && errno == EINTR);
	if (result == -1)
		result = errno;
	else
		result = 0;

	return result;
}

/* Release a read or write lease on a descriptor, returning 0 or errno.
*/
static inline int unlease(const int descriptor)
{
	int result;

	do {
		result = fcntl(descriptor, F_SETLEASE, F_UNLCK);
	} while (result == -1 && errno == EINTR);
	if (result == -1)
		result = errno;
	else
		result = 0;

	return result;
}

/* Close descriptor, returning 0 or errno. Keeps actual errno unchanged.
*/
static inline int closefd(const int descriptor)
{
	const int saved_errno = errno;
	int       result;

	do {
		result = close(descriptor);
	} while (result == -1 && errno == EINTR);

	if (result == -1)
		result = errno;
	else
		result = 0;

	errno = saved_errno;
	return result;
}

/* Create a hard link. Returns 0 if success, errno otherwise.
*/
static inline int hardlink(const char *const oldfile, const char *const newfile)
{
	int result;

	if (!oldfile || !newfile || !*oldfile || !*newfile)
		return EINVAL;

	do {
		result = link(oldfile, newfile);
	} while (result == -1 && errno == EINTR);
	if (result == -1)
		result = errno;
	else
		result = 0;

	return result;
}

/* Unlink. Returns 0 if success, errno otherwise.
*/
static inline int removelink(const char *const filename)
{
	int result;

	if (!filename || !*filename)
		return EINVAL;

	do {
		result = unlink(filename);
	} while (result == -1 && errno == EINTR);
	if (result == -1)
		result = errno;
	else
		result = 0;

	return result;
}

/* Return a dynamically allocated string,
 * where the directory part of the path is replaced with dir.
*/
static char *new_path(const char *const dir, const char *const path)
{
	const size_t	 dirlen = (dir) ? strlen(dir) : 0;
	const char	*name;
	char		*p;
	size_t		 namelen, n;

	/* path must not be NULL. */
	if (!path) {
		errno = EINVAL;
		return NULL;
	}

	/* Find the name part of the path. */
	name = strrchr(path, '/');
	if (name)
		name++;
	else
		name = path;
	namelen = strlen(name);

	/* We need a name. */
	if (!namelen) {
		errno = EINVAL;
		return NULL;
	}

	/* Allocate the new path. */
	p = malloc(dirlen + namelen + (size_t)2);
	if (!p) {
		errno = ENOMEM;
		return NULL;
	}

	/* Copy the directory part. */
	if (dirlen) {
		memcpy(p, dir, dirlen);

		n = dirlen;

		/* Trim trailing slashes. */
		while (n > (size_t)0 && p[n-1] == '/')
			n--;

		/* Append one trailing slash. */
		p[n++] = '/';

	} else
		n = 0;

	/* Copy the name part. */
	memcpy(p + n, name, namelen);
	p[n + namelen] = '\0';

	/* Done. */
	return p;
}

/* Dummy signal handler. */
static void dummy_sigaction(int signum, siginfo_t *info, void *data)
{
	return;
}

/* Set sigio handler. Return 0 if success, errno otherwise. */
static int set_sigio_handler(void)
{
	struct sigaction act;
	int              result;

	sigemptyset(&act.sa_mask);
	act.sa_sigaction = dummy_sigaction;
	act.sa_flags = SA_RESTART;

	do {
		result = sigaction(SIGIO, &act, NULL);
	} while (result == -1 && errno == EINTR);
	if (result == -1)
		result = errno;
	else
		result = 0;

	return result;
}


int main(int argc, char *argv[])
{
	char const *delimiter = NULL;
	int         age = -1;
	char       *dir;
	int         arg, status, result, descriptor;

	if (argc <= 1 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
		fprintf(stderr, "\n"
		                "Usage: %s [ -h | --help ]\n"
		                "       %s [ options ] [ -- ] target-directory/ source-file(s)...\n"
		                "Options:\n"
		                "       -d      Output moved file names to standard output,\n"
		                "               each name terminated by an ASCII NUL (0).\n"
		                "       -dSEP   Output moved file names to standard output,\n"
		                "               each name terminated by SEP.\n"
		                "       -mSEC   Ignore files modified during the last SEC seconds.\n"
		                "\n"
		                "This program will move the source files to the target directory,\n"
		                "if and only if the files are not being modified.\n"
		                "Normally only files owned by the user are considered, unless\n"
		                "the CAP_LEASE capability is used.\n"
		                "\n"
		                "If successful, the exit status will be zero (success).\n"
		                "Otherwise, exit status will be a mask ORed with\n"
				"   %3d  One or more files were moved, but another process\n"
		                "        managed to open it for writing anyway.\n"
		                "   %3d  Error unlinking a file.\n"
		                "   %3d  Error releasing a read lease on a file.\n"
		                "   %3d  Error constructing a target path.\n"
		                "   %3d  Error closing a read-only file.\n"
		                "If there is a problem at startup, exit status will be %d.\n"
		                "\n", argv[0], argv[0],
		                STATUSBIT_RELINK, STATUSBIT_UNLINK, STATUSBIT_UNLEASE,
		                STATUSBIT_NAME, STATUSBIT_CLOSE, STATUS_FAILURE);
		return STATUS_OK;
	}

	/* Set SIGIO handler. */
	result = set_sigio_handler();
	if (result) {
		const char *const error = strerror(result);

		fprintf(stderr, "Cannot set an IO signal handler: %s.\n", error);
		fflush(stderr);

		return STATUS_FAILURE;
	}

	/* Argument currently considered. */
	arg = 1;

	/* Parse options. */
	while (arg < argc) {
		if (argv[arg][0] == '-' && argv[arg][1] == 'd') {
			delimiter = (const char *)&(argv[arg][2]);
			arg++;

		} else
		if (argv[arg][0] == '-' && argv[arg][1] == 'm') {
			int  temp_i;
			char temp_c;

			if (!argv[arg][2]) {
				fprintf(stderr, "-m: Number of seconds not specified.\n");
				fflush(stderr);
				return STATUS_FAILURE;
			}

			if (sscanf(&argv[arg][2], "%d%c", &temp_i, &temp_c) != 1) {
				fprintf(stderr, "%s: Invalid number of seconds.\n", (const char *)&argv[arg][2]);
				fflush(stderr);
				return STATUS_FAILURE;
			}
			if (temp_i < 0) {
				fprintf(stderr, "%s: Invalid number of seconds.\n", (const char *)&argv[arg][2]);
				fflush(stderr);
				return STATUS_FAILURE;
			}

			age = temp_i;
			arg++;

		} else
			break;
	}

	/* Skip -- */
	if (arg < argc && !strcmp(argv[arg], "--"))
		arg++;

	/* Target path. */
	if (arg >= argc) {
		fprintf(stderr, "No target directory specified.\n");
		fflush(stderr);
		return STATUS_FAILURE;
	}
	dir = argv[arg++];

	/* Assume no errors. */
	status = 0;

	/* Process each source file separately. */
	for (; arg < argc; arg++) {
		const char *const  source = argv[arg];
		char              *target;

		/* Open source file read-only. */
		descriptor = rdopen(source);
		if (descriptor == -1) {
			const char *const error = strerror(errno);

			fprintf(stderr, "%s: %s. Skipped.\n", source, error);
			fflush(stderr);
			continue;
		}

		/* Acquire read-lease on the descriptor. */
		result = rdlease(descriptor);
		if (result == EAGAIN || result == EWOULDBLOCK) {

			/* File is being modified. */
			closefd(descriptor);
			fprintf(stderr, "%s: In use. Skipped.\n", source);
			fflush(stderr);
			continue;
		} else
		if (result) {
			const char *const error = strerror(result);

			/* Read-leasing failed. */
			closefd(descriptor);
			fprintf(stderr, "%s: Cannot get a read lease: %s.\n", source, error);
			fflush(stderr);
			continue;
		}

		/* Check age limit, if one was specified. */
		if (age >= 0) {
			result = last_modified(descriptor);
			if (result <= age) {

				unlease(descriptor);
				closefd(descriptor);

				fprintf(stderr, "%s: Modified %d seconds ago. Skipped.\n", source, result);
				fflush(stderr);
				continue;
			}
		}

		/* Full path to target file */
		target = new_path(dir, source);
		if (!target) {
			const char *const error = strerror(errno);

			unlease(descriptor);
			closefd(descriptor);

			fprintf(stderr, "%s: Cannot build target name: %s.\n", source, error);
			fflush(stderr);

			status |= STATUSBIT_NAME;
			continue;
		}

		/* Create the hard link. */
		result = hardlink(source, target);
		if (result) {
			const char *const error = strerror(result);

			unlease(descriptor);
			closefd(descriptor);

			fprintf(stderr, "%s: %s.\n", target, error);
			fflush(stderr);

			free(target);
			continue;
		}

		/* Unlink the source. */
		result = removelink(source);
		if (result) {
			const char *const error = strerror(result);

			unlease(descriptor);
			closefd(descriptor);

			fprintf(stderr, "%s: Cannot move file: %s.\n", source, error);
			fflush(stderr);

			removelink(target);
			free(target);

			status |= STATUSBIT_UNLINK;
			continue;
		}

		/* Release the lease. */
		result = unlease(descriptor);
		if (result) {
			const char *const error = strerror(result);

			closefd(descriptor);

			fprintf(stderr, "%s: Error releasing read lease: %s.\n", source, error);
			fflush(stderr);

			status |= STATUSBIT_UNLEASE;
			goto undo;
		}

		/* Re-acquire the lease, to make sure it's still not being modified. */
		result = rdlease(descriptor);
		if (result) {
			closefd(descriptor);
			goto undo;
		}

		/* Release the lease. */
		result = unlease(descriptor);
		if (result) {
			const char *const error = strerror(result);

			closefd(descriptor);

			fprintf(stderr, "%s: Error releasing read lease: %s.\n", source, error);
			fflush(stderr);

			status |= STATUSBIT_UNLEASE;
			goto undo;
		}

		/* Close the descriptor. */
		result = closefd(descriptor);
		if (result) {
			const char *const error = strerror(result);

			fprintf(stderr, "%s: Error closing file: %s.\n", source, error);
			fflush(stderr);

			free(target);

			status |= STATUSBIT_CLOSE;
			continue;
		}

		if (delimiter) {

			/* Output moved file path. */
			fputs(target, stdout);
			if (*delimiter)
				fputs(delimiter, stdout);
			else
				fputc('\0', stdout);
			fflush(stdout);
		}

		free(target);

		/* Success. */
		continue;

	undo:	/* Try to reinstate the original hard link. */
		result = hardlink(target, source);
		if (result) {
			const char *const error = strerror(result);

			fprintf(stderr, "%s: Cannot return file: %s.\n", source, error);
			fprintf(stderr, "%s: Moved to '%s'.\n", source, target);
			fflush(stderr);

			free(target);

			status |= STATUSBIT_RELINK;
			continue;
		}

		/* Phew, success. Remove the target hard link. */
		result = removelink(target);
		if (result) {
			const char *const error = strerror(result);

			fprintf(stderr, "%s: Error removing temporary hardlink: %s.\n", target, error);
			fflush(stderr);

			free(target);

			status |= STATUSBIT_UNLINK;
			continue;
		}

		/* Success. */
		fprintf(stderr, "%s: In use. Skipped.\n", source);
		fflush(stderr);

		free(target);
		continue;
	}

	/* Done. */
	return status;
}

Then, compile and install it as /usr/local/bin/movecompleted using

Code:

gcc -Wall -O3 -std=c99 -pedantic -fomit-frame-pointer -o movecompleted movecompleted.c
sudo install -m 0755 movecomplete /usr/local/bin

By default, it will only move files the user owns (because it can only get a lease on files the user owns). You can safely give it the CAP_LEASE capability by running

Code:

sudo setcap CAP_LEASE=pe /usr/local/bin/movecomplete

(you need libcap binaries installed to run that command). Then, if the user can move the file normally, they can also use movecomplete to move them if unused.

When you run for example

Code:

movecomplete -m60 /data/done/ /data/incoming/*

the program will move all files that have not been modified in the last minute, and are not open for writing by any program, to /data/done/. Run the program without any arguments to get the full usage information. Note that the target directory and the source files must reside on the same filesystem. (The program will not copy data; it will just rename (hardlink) files between directories.)

Here is how I personally would use this program. Assuming that

Some program will create new files in /data/incoming/
Files to be transferred (scp) are moved to /data/outgoing/
Transferred files are moved to /data/complete/
User someuser owns all the files, and is the user to scp as

I would run this regularly via cron:

Code:

#!/bin/bash

# User account to run as
USER="someuser"

# Minimum age of files, in seconds
AGELIMIT=60

# Directories
INCOMING="/data/incoming/"
OUTGOING="/data/outgoing/"
COMPLETE="/data/complete/"

# SCP command
SCPCMD=("scp" "-c" "blowfish" "-o" "ConnectTimeout=15")
SCPTARGET=user@remote:path/

# Flag directory: if it exists, I'm running
RUNNING="$OUTGOING/.running/"

# Make sure we are run as the specified user
if [ "`id -un`" != "someuser" ]; then
        exec sudo -u "someuser" "$0" "$@"
        exit 99
fi

# Exit if we are already running
mkdir "$RUNNING" &>/dev/null && exit 0

# Remove the running flag automatically, when we exit
trap "rm -rf '$RUNNING'" EXIT

# Move all completed files.
find "$INCOMING" -maxdepth 1 -type f -print0 | xargs -r0 movecomplete -m$AGELIMIT "$OUTGOING"

# SCP all outgoing files, one by one.
find "$OUTGOING" -maxdepth 1 -type f -print0 | while read -d "" FILE ; do
        "${SCPCMD[@]}" "$FILE" "$SCPTARGET" || continue
        mv -f "$FILE" "$COMPLETED" || continue
        # Further processing?
done

Depending on the cron interval, I might add some error checking. My cron captures and logs any output by default, so I will get an e-mail if there is anything amiss in the process above. Rather than relying on that, I could capture e.g. scp output and errors, and for example send an e-mail if the remote disk is full. (It's the typical reason why a scp transfer would fail.)

If any of you would like me to clean up the sources, or add some new feature, drop me a private message and I'll gladly consider it. I hope you find it useful,

anon091 · 08-30-2011, 10:32 AM

wow, this is cool! and i kinda understand it too! haha

thanks for all the help Nominal, you went above and beyond on this one, was pretty interested topic and I learned a lot.