Hi,
my input looks like this:
Code:
PASS|A(1/2);B(3/4);C(5/6)
PASS|A(7/8);C(9/10)
FAIL|B(11/12)
I am trying to isolate the individual ";"-separated strings from $2 in identical fields based on the first letter.
example from record 1 of input above:
letter1=A
letter1_val1=1
letter1_val2=2
letter2=B
letter2_val1=3
letter2_val2=4
letter3=C
letter3_val1=5
letter3_val2=6
Output would be:
Code:
Test|A_val1|A_val2|B_val1|B_val2|C_val1|C_val2
PASS|1|2|3|4|5|6
PASS|7|8|-|-|9|10
FAIL|-|-|11|12|-|-
Note that I know in advance how the final header (here in red) will look like (i.e. here I know I have 3 different letters in $2).
So far I managed to split the different letters and their values but I am struggling to order them.
Code:
awk '
BEGIN{
FS=OFS="|"
# The output header is known in advance
header = "Test|A_val1|A_val2|B_val1|B_val2|C_val1|C_val2"
a=split(header,b,"|")
# val1 record indexes
for(i=2; i<=a; i+=2){
letter_val1 = b[i]
gsub(/_val1/,"",letter_val1)
c[letter_val1]=i
}
# val2 record indexes
for(j=3; j<= a; j+=2){
letter_val2 = b[j]
gsub(/_val2/,"",letter_val2)
d[letter_val2]=j
}
print header
}
{
# isolate ";"-separated string from $2 and their respective 'letter', 'val1' and 'val2'
e=split($2,f,";")
for(k=1; k<=e; k++){
split(f[k],g,"[\(/\)]")
letter=g[1]
val1=g[2]
val2=g[3]
# match letter and val1 with proper record number
if(letter in c){
$(c[letter])=val1
}
# match letter and val2 with proper record number
if(letter in d){
$(d[letter])=val2
}
for(l=1; l<=NF; l++){
if($l ~ /^$/){
$l="-"
}
}
print $0
} input > output
The code above produces this output:
Code:
Test|A_val1|A_val2|B_val1|B_val2|C_val1|C_val2
PASS|1|2|3|4|5|6
PASS|7|8|-|-|9|10
FAIL|B(11/12)|-|11|12
EDIT: Find a way to fill the blanks, but the problem now is when $2 does not begin by letter 'A' (see record 3 above).