LinuxQuestions.org
Visit Jeremy's Blog.
Home Forums Tutorials Articles Register
Go Back   LinuxQuestions.org > Forums > Non-*NIX Forums > Programming
User Name
Password
Programming This forum is for all programming questions.
The question does not have to be directly related to Linux and any language is fair game.

Notices


Reply
  Search this Thread
Old 09-11-2009, 12:44 PM   #16
catkin
LQ 5k Club
 
Registered: Dec 2008
Location: Tamil Nadu, India
Distribution: Debian
Posts: 8,578
Blog Entries: 31

Rep: Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208

Quote:
Originally Posted by ta0kira View Post
This is what I meant above when I said read doesn't actually read when I removed cat; this wouldn't work for me. I'm not exactly sure why, but it probably has to do with it thinking standard input is a terminal. For example, this doesn't do anything at all:
Code:
while true; do 
  echo "sleep `expr $RANDOM % 5`"
done | ( for I in `seq 1 4`; do
  while read line; do
    echo "[$0: fork $I: '$line']" 1>&2
    ( eval $line; )
  done &
done; sleep 10; kill 0; )
Add the cat back in and it works, though.
Kevin Barry
I have no idea why (and would like to find out) but this works
Code:
commands='sleep 1
sleep 2
sleep 3
sleep 4
sleep 5
sleep 6'
for I in `seq 1 4`; do
  while read line; do
    echo "[$0: fork $I: '$line']" 1>&2
    ( eval $line; )
  done &
done <<EOF
$commands
EOF
sleep 10; kill 0
EDIT:
and that may have been the significant, unintentional difference when I rewrote your script to explore: I used < input.txt in a similar way to the "here document" above.

Last edited by catkin; 09-11-2009 at 12:46 PM.
 
Old 09-11-2009, 03:46 PM   #17
ta0kira
Senior Member
 
Registered: Sep 2004
Distribution: FreeBSD 9.1, Kubuntu 12.10
Posts: 3,078

Original Poster
Rep: Reputation: Disabled
Here it is in C. It took about an hour to write. I'm sure it isn't perfect, but it's definitely what I'm looking for. I'll have to go over it a few more times to make sure I didn't mess anything up.
Code:
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/socket.h>


typedef struct
{
	int   socket;
	FILE *file;
} usable_socket;


static int parse_long_value(const char*, int*);
static int process_loop(const char*, int, unsigned int);
static int select_loop(const char*, usable_socket*, unsigned int);


int main(int argc, char *argv[])
{
	int processes = 0, I;

	if (argc != 2 || !(parse_long_value(argv[1], &processes)) || processes < 0)
	{
	fprintf(stderr, "%s [process count]\n", argv[0]);
	return 1;
	}

	usable_socket *all_sockets = calloc(processes, sizeof(usable_socket));
	if (!all_sockets)
	{
	fprintf(stderr, "%s: allocation error: %s\n", argv[0], strerror(errno));
	return 1;
	}


	signal(SIGPIPE, SIG_IGN);
	setlinebuf(stderr);


	for (I = 0; I < processes; I++)
	{
        int new_sockets[2] = { -1, -1 };
        if (socketpair(PF_LOCAL, SOCK_STREAM, 0, new_sockets) < 0)
	 {
	fprintf(stderr, "%s: socket error: %s\n", argv[0], strerror(errno));
	break;
	 }

	fcntl(new_sockets[0], F_SETFD, fcntl(new_sockets[0], F_GETFD) | FD_CLOEXEC);
	fcntl(new_sockets[1], F_SETFD, fcntl(new_sockets[1], F_GETFD) | FD_CLOEXEC);

	pid_t next_process = fork();

	if (next_process < 0)
	 {
	fprintf(stderr, "%s: fork error: %s\n", argv[0], strerror(errno));
	close(new_sockets[0]);
	close(new_sockets[1]);
	break;
	 }

	else if (next_process == 0)
	 {
	free(all_sockets);
	close(new_sockets[0]);
	return process_loop(argv[0], new_sockets[1], I + 1);
	 }

	else
	 {
	if (!(all_sockets[I].file = fdopen(new_sockets[0], "a+")))
	 {
	fprintf(stderr, "%s: stream error: %s\n", argv[0], strerror(errno));
	close(new_sockets[0]);
	close(new_sockets[1]);
	continue;
	 }

	all_sockets[I].socket = new_sockets[0];
	close(new_sockets[1]);
	 }
	}


	return select_loop(argv[0], all_sockets, (unsigned) processes);
}


static int parse_long_value(const char *dData, int *vValue)
{
	if (!dData || !*dData || !vValue) return 0;
	char *endpoint = NULL;
	*vValue = strtol(dData, &endpoint, 10);
	return endpoint && (*endpoint == 0x00);
}


static int process_loop(const char *nName, int sSocket, unsigned int nNumber)
{
	int returned = 0;

	FILE *parent_file = fdopen(sSocket, "a+");
	if (!parent_file)
	{
	fprintf(stderr, "%s[%u]: stream error: %s\n", nName, nNumber, strerror(errno));
	close(sSocket);
	return 1;
	}

	const int buffer_size = sysconf(_SC_PAGESIZE);

	char *buffer = malloc(buffer_size);
	if (!buffer)
	{
	fprintf(stderr, "%s[%u]: allocation error: %s\n", nName, nNumber, strerror(errno));
	fclose(parent_file);
	return 1;
	}

	while (fprintf(parent_file, "%i\n", returned) && fgets(buffer, buffer_size, parent_file))
	{
	fprintf(stderr, "%s[%u]: executing: %s", nName, nNumber, buffer);
	fflush(stderr);
	returned = system(buffer);
	}

	free(buffer);
	return 0;
}


static int select_loop(const char *nName, usable_socket *sSockets, unsigned int cCount)
{
	if (!sSockets) return 1;

	const int buffer_size = sysconf(_SC_PAGESIZE);
	char return_buffer[32];

	char *buffer = malloc(buffer_size);
	if (!buffer)
	{
	fprintf(stderr, "%s: allocation error: %s\n", nName, strerror(errno));
	return 1;
	}

	while (fgets(buffer, buffer_size, stdin))
	{
	int added = 0, I;

	fd_set input_sockets;
	FD_ZERO(&input_sockets);

	for (I = 0; I < cCount; I++)
	/*NOTE: standard input isn't a valid socket here*/
	if (sSockets[I].file)
	 {
	FD_SET(sSockets[I].socket, &input_sockets);
	++added;
	 }

	if (!added) break;

	if (select(FD_SETSIZE, &input_sockets, NULL, NULL, NULL) >= 0 || errno == EBADF)
	for (I = 0; I < cCount; I++)
	if (sSockets[I].file && FD_ISSET(sSockets[I].socket, &input_sockets))
	 {
	if (errno == EBADF)
	  {
	fclose(sSockets[I].file);
	sSockets[I].socket = 0;
	sSockets[I].file   = NULL;
	  }
	else if (fgets(return_buffer, sizeof return_buffer, sSockets[I].file))
	  {
	/*TODO: do something with the return?*/
	fprintf(sSockets[I].file, "%s", buffer);
	if (fflush(sSockets[I].file) != 0)
	   {
	fclose(sSockets[I].file);
	sSockets[I].socket = 0;
	sSockets[I].file   = NULL;
	   }
	break;
	  }
	 }
	}

	free(buffer);
	free(sSockets);
	return 0;
}
Kevin Barry
 
Old 09-11-2009, 04:27 PM   #18
ta0kira
Senior Member
 
Registered: Sep 2004
Distribution: FreeBSD 9.1, Kubuntu 12.10
Posts: 3,078

Original Poster
Rep: Reputation: Disabled
This works too well! I managed to get 16 MATLAB sessions running, each requiring >1GB of memory and 100% CPU time, which crashed sshd and nut on my server! I'm like a child waving a gun around. It's probably best for me to scale it back to 8 or so concurrent processes, just in case. Beware that you might get exactly what you asked for with this program!
Kevin Barry
 
Old 09-12-2009, 09:03 AM   #19
gnashley
Amigo developer
 
Registered: Dec 2003
Location: Germany
Distribution: Slackware
Posts: 4,928

Rep: Reputation: 612Reputation: 612Reputation: 612Reputation: 612Reputation: 612Reputation: 612
When you use 'cat', split lines are automatically made into a single line. Compare what happens when you use cat compared to using shell redirection:
Code:
cat somefile |while read line ; do
echo "$line"
done
[CODE]
while read line ; do
echo "$line"
done<somefile

Using printf instead of echo may behave some other way... Be sure to compare what happens when using $line without quotes.
 
Old 09-12-2009, 09:56 AM   #20
catkin
LQ 5k Club
 
Registered: Dec 2008
Location: Tamil Nadu, India
Distribution: Debian
Posts: 8,578
Blog Entries: 31

Rep: Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208
Quote:
Originally Posted by gnashley View Post
When you use 'cat', split lines are automatically made into a single line.
Can you explain some more? I understood that cat simply copies its input to its output verbatim, without making any changes.
 
Old 09-12-2009, 10:17 AM   #21
catkin
LQ 5k Club
 
Registered: Dec 2008
Location: Tamil Nadu, India
Distribution: Debian
Posts: 8,578
Blog Entries: 31

Rep: Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208
Quote:
Originally Posted by ta0kira View Post
This works too well! I managed to get 16 MATLAB sessions running, each requiring >1GB of memory and 100% CPU time, which crashed sshd and nut on my server! I'm like a child waving a gun around. It's probably best for me to scale it back to 8 or so concurrent processes, just in case. Beware that you might get exactly what you asked for with this program!
Kevin Barry
A good stress testing program then! Rather than scale back the number of sub-processes, how about them looping until there is spare capacity before launching their command? Going a step ahead, it would then be useful to precede the command with a characterisation of the command's estimated resource needs -- disk IO, CPU, memory, network etc., so the sub-processes could assess the relevant available capacity ...
 
Old 09-12-2009, 11:57 AM   #22
ta0kira
Senior Member
 
Registered: Sep 2004
Distribution: FreeBSD 9.1, Kubuntu 12.10
Posts: 3,078

Original Poster
Rep: Reputation: Disabled
That's a very interesting idea. Maybe it should be in the form of a comment at the end of the line (e.g. #<1.5GB>), which would allow it to be optional. Really the CPU usage is assumed to be 100% for all lines. It would just be a matter of estimating memory usage.
Kevin Barry
 
Old 09-12-2009, 12:03 PM   #23
ta0kira
Senior Member
 
Registered: Sep 2004
Distribution: FreeBSD 9.1, Kubuntu 12.10
Posts: 3,078

Original Poster
Rep: Reputation: Disabled
Quote:
Originally Posted by gnashley View Post
When you use 'cat', split lines are automatically made into a single line. Compare what happens when you use cat compared to using shell redirection:
Code:
cat somefile |while read line ; do
echo "$line"
done
[code]
while read line ; do
echo "$line"
done<somefile

Using printf instead of echo may behave some other way... Be sure to compare what happens when using $line without quotes.
Output from both cat and read aren't really in question, though; it's an issue of what happens when cat and read read from a file descriptor. Here is a simulation of what I'd expect cat and read to do internally:
Code:
#!/bin/bash


ls -l | { { while buffer="$( dd bs=32 count=1 2> /dev/null )" && [ -n "$buffer" ]; do
  echo "~~~~~buffered~~~~~ '$buffer'" 1>&2
  echo 1>&2
  echo -n "$buffer"
done; echo; } | while read line; do
  echo "*****line***** '$line'" 1>&2
  echo 1>&2
done } 2>&1
You can replace ls with something else if you have some other line input you want to feed it. What happens is it reads fixed-sized chunks of input into a buffer, then it actually parses the buffered data for newlines. The buffer size of 32 is just for illustrative purposes; I'd expect it to be a multiple of the page size or of the st_blksize of the device being read from. In this example, my use of read is merely coincidence; I'm not demonstrating the observed behavior of read. The simulated behavior in this example is contrary to actual observation. If it helps illustrate my point, treat the script as the "black box" of cat; the observed output of the script is the only thing of relevance.
Kevin Barry

Last edited by ta0kira; 09-12-2009 at 12:31 PM.
 
Old 09-13-2009, 03:32 AM   #24
colucix
LQ Guru
 
Registered: Sep 2003
Location: Bologna
Distribution: CentOS 6.5 OpenSuSE 12.3
Posts: 10,509

Rep: Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983
Hi. I'd return for a moment to the original question to confirm that ta0kira's worries are well-founded. I created a test file using this command:
Code:
$ for i in $(seq 1 10000); do printf "line%05d\n" $i >> testfile; done
$ head -3 testfile
line00001
line00002
line00003
$ tail  -3 testfile
line09998
line09999
line10000
$ wc -l testfile
10000 testfile
in this way I can easily track the results and compare the output with the input file. Then I run a slightly modified version of the ta0kira's script, taking care of preserving the original structure and functionality (modifications are in the block of code inside the inner loop, also I removed unnecessary - for my purposes - code at the beginning):
Code:
#!/bin/bash
max_count="$1"
shift

cat $* | for I in `seq 1 "$max_count"`; do
  cat | while read line; do
    echo $line $I
  done &
done
Now I run this script as
Code:
./test.sh 2 testfile > oufile
the result is that some lines are actually broken, suggesting the buffer problem as per ta0kira's hypothesis. The number of lines in the output file is less than 10000 (again something in the buffer is lost) and if I run the same multiple times, I always get a different result. Here is an example of the broken output (the first number comes from nl):
Code:
     1  410 2
     2  line00411 2
     3  line00412 2
<omitted>
  1542  line01557 2
  1543  line00line08193 1
  1544  line01558 2
  1545  line08194 1
<omitted>
A careful analysis tells me that the strings highlighted in green are part of the same original line: line00410.

Another clue was the need of the cat command before the inner loop. Again I can try to explain with a simplified example:
Code:
for i in $(seq 1 3)
do
  cat
done
This accepts standard input from the keyboard for three times until you press Ctrl-D (end of standard input) each time. It also prints out the input string every time you press enter (line-based input). This mean it just serves as an input/output carrier. Indeed, without that the inner loop in the ta0kira script does not receive any input, since there is no redirection (without the "cat |" part) and most likely it expect standard input from the keyboard. However, since the inner loop runs in background, standard input is not tied to a tty and this causes the process to stop (and terminate when the parent dies).
 
Old 09-13-2009, 04:53 AM   #25
catkin
LQ 5k Club
 
Registered: Dec 2008
Location: Tamil Nadu, India
Distribution: Debian
Posts: 8,578
Blog Entries: 31

Rep: Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208
Thanks for the careful analysis, colucix

I still do not understand why the cat in the inner loop is necessary or why any characters are lost.

AIUI, cat used this way simply reads from stdin and writes to stdout which is piped to the stdin of read. Because we have one cat per sub-shell and each does buffered reads from the same original stdin, each sub-shell sees a discontinuity in the original stdin when its cat's buffer is refilled and another sub-shell's cat has taken the next chunk of stdin.

But why is the cat in the inner loop necessary? If all it is doing is piping its stdin to "read"'s stdin, why can't "read" read the stdin directly.

And why are any characters lost?
 
Old 09-13-2009, 11:08 AM   #26
ta0kira
Senior Member
 
Registered: Sep 2004
Distribution: FreeBSD 9.1, Kubuntu 12.10
Posts: 3,078

Original Poster
Rep: Reputation: Disabled
Quote:
Originally Posted by colucix View Post
Code:
./test.sh 2 testfile > oufile
This is interesting; however, this could be more of an issue of failure to share the output file and to open the file in "append" mode when writing. I've experienced this same thing when several processes were trying to write complete lines to the same file and I often got broken lines until I implemented advisory locking between the processes. When you used >, this opened it in "truncate" mode (an not necessarily "append"); therefore, try it again with this and see what happens:
Code:
echo -n > outfile
./test.sh 2 testfile >> outfile
When I tried this I only had one bad line when using both > and >>, but I'm testing this on a dual-core machine rather than the 16.
Kevin Barry

PS Better yet, change this:
Code:
    echo $line $I
to this:
Code:
    echo $line $I > outfile$I
then cat those files together.

Last edited by ta0kira; 09-13-2009 at 11:12 AM.
 
Old 09-13-2009, 11:53 AM   #27
colucix
LQ Guru
 
Registered: Sep 2003
Location: Bologna
Distribution: CentOS 6.5 OpenSuSE 12.3
Posts: 10,509

Rep: Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983
Quote:
Originally Posted by catkin View Post
But why is the cat in the inner loop necessary? If all it is doing is piping its stdin to "read"'s stdin, why can't "read" read the stdin directly.
The reason lies in the definition of standard input itself. Yes, without cat it tries to read from the standard input, but the standard input is the keyboard input, unless redirected. Cat performs this redirection: it takes the standard input redirected from the first cat into the outer loop and flushes its output through the pipe, which becomes the input source for the inner loop.

In other words, if the inner loop is left "alone":
Code:
  while read line; do
    echo $line $I
  done &
the read statement takes (or better, tries to take) standard input from the keyboard, since there is no redirection at all in this block of code!
Quote:
And why are any characters lost?
I think the reason is that explained by ta0kira in the previous post. I didn't think about two sub-processes writing to the same output. I will perform the test suggested and let you know!

Last edited by colucix; 09-13-2009 at 11:54 AM.
 
Old 09-13-2009, 11:54 AM   #28
catkin
LQ 5k Club
 
Registered: Dec 2008
Location: Tamil Nadu, India
Distribution: Debian
Posts: 8,578
Blog Entries: 31

Rep: Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208Reputation: 1208
Quote:
Originally Posted by ta0kira View Post
This is interesting; however, this could be more of an issue of failure to share the output file and to open the file in "append" mode when writing. I've experienced this same thing when several processes were trying to write complete lines to the same file and I often got broken lines until I implemented advisory locking between the processes. When you used >, this opened it in "truncate" mode (an not necessarily "append"); therefore, try it again with this and see what happens:
Code:
echo -n > outfile
./test.sh 2 testfile >> outfile
When I tried this I only had one bad line when using both > and >>, but I'm testing this on a dual-core machine rather than the 16.
Kevin Barry

PS Better yet, change this:
Code:
    echo $line $I
to this:
Code:
    echo $line $I > outfile$I
then cat those files together.
Good insight Doing exactly that (except removing $I from the echo) then sorting the combined output and diffing it with the input there were no differences. Phew!

Which leaves the question of why the inner loop cat is necessary to make it work when, AIUI, "read" itself should be able to read from stdin just as well as cat can... ?

Last edited by catkin; 09-13-2009 at 11:55 AM. Reason: More comprehensible!
 
Old 09-13-2009, 12:18 PM   #29
colucix
LQ Guru
 
Registered: Sep 2003
Location: Bologna
Distribution: CentOS 6.5 OpenSuSE 12.3
Posts: 10,509

Rep: Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983Reputation: 1983
Quote:
Originally Posted by catkin View Post
Which leaves the question of why the inner loop cat is necessary to make it work when, AIUI, "read" itself should be able to read from stdin just as well as cat can... ?
Maybe you missed my last post...
 
Old 09-13-2009, 12:41 PM   #30
gnashley
Amigo developer
 
Registered: Dec 2003
Location: Germany
Distribution: Slackware
Posts: 4,928

Rep: Reputation: 612Reputation: 612Reputation: 612Reputation: 612Reputation: 612Reputation: 612
I get this:
line00236 1
line00237 1
line00238 1
line00239 1
ne00820 2
line00821 2
line00822 2
and this:
line03275 2
line03276 2
line032703687 2
line03688 2
line03689 2

This seems to work, by using read in raw mode:
Code:
$ cat try.sh
#!/bin/bash

max_count="$1"
shift
actual_count=0
OLD_IFS=$IFS
IFS=$'\n'
cat $1 | while read line ; do
    ((actual_count++))
    [[ $actual_count -gt $max_count ]] && break
    IFS=''
    echo "$line" | while read -r -n 1 char ; do
       echo -n $char
       shift
    done
    echo ""
    IFS=$'\n'
done
IFS=$OLD_IFS
(sh ./try.sh 10000 testfile > oufile)
It's very slow since it reads each line a char at a time. You could also redirect and use read -u? to read input from a specified FD.

Here's a version that is a bit faster and free from external calls to cat:
Code:
#!/bin/bash
max_count="$1"
filename="$2"
# shift
actual_count=0
OLD_IFS=$IFS
IFS=$'\n'
while read line ; do
    ((actual_count++))
    [[ $actual_count -gt $max_count ]] && break
    IFS=''
    linelen=${#line}
    char_offset=0
    while [[ $char_count -lt $linelen ]] ; do
	echo -n ${line:$char_offset:1}
	(( char_offset++ ))
    done
    echo ""
    IFS=$'\n'
done <"$filename"

IFS=$OLD_IFS
 
  


Reply



Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is Off
HTML code is Off



Similar Threads
Thread Thread Starter Forum Replies Last Post
LXer: Reading Multiple Files with Bash LXer Syndicated Linux News 0 08-22-2009 12:00 AM
multiple processes read from a device jiankunli Linux - Kernel 3 05-09-2008 05:15 PM
bash: read multiple lines in a file 300zxkyle Programming 7 07-29-2007 04:38 AM
Bash: How to read tab character when reading input new_to_bash Programming 7 12-09-2006 07:31 PM
bash: create a descriptor reading/writing from/to different files? Thinking Programming 0 04-19-2006 03:28 PM

LinuxQuestions.org > Forums > Non-*NIX Forums > Programming

All times are GMT -5. The time now is 12:01 AM.

Main Menu
Advertisement
My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
Open Source Consulting | Domain Registration