Hi.
Finally I seem to got it working. Here is an awk script:
Code:
BEGIN{ RS="[$][$][$][$]\n" }
{ compounds[$1]++ }
END{
for(c in compounds)
if(compounds[c]>1) common[c] = 1
print "Common compounds:"
for(c in common)
print c
delete ARGV[0]
for(f in ARGV)
{
unmatched = gensub("(.*)\\.sdf$", "\\1", "g", ARGV[f]) "_unmatched.sdf"
while((getline < ARGV[f]) > 0)
{
if(!$1)
continue
if($1 in common ) {
if ( !($1 in seen) )
{
if($0) print $0"$$$$" > "common.sdf"
seen[$1] = 1
}
}
else print $0"$$$$" > unmatched
}
close(ARGV[f]"_unmatched.sdf")
}
close("common.sdf")
}
Save it to file, for example "process.awk".
To run it, type in the console
Code:
awk -f process.awk FILE_*.sdf
The script will print common compounds to standard output, create "common.sdf", and "FILE_*_unmatched.sdf" if there are unmatched compounds in corresponding sdf file.
Hope that helps.