Awk 对于字段中的每个不同引用,打印具有关联最大值的行
我有 我想打印Awk 对于字段中的每个不同引用,打印具有关联最大值的行,awk,Awk,我有 我想打印$2中的每个元素与$3中的最高值关联的行 ID=exon-XM_030285750.2 LOC100221041 7895 ID=exon-XM_030285760.2 LOC100221041 8757 ID=exon-XM_030285720.2 LOC100221041 8656 ID=exon-XM_030285738.2 LOC100221041 8183 ID=exon-XM_030285728.2 LOC100221041 84
$2
中的每个元素与$3
中的最高值关联的行
ID=exon-XM_030285750.2 LOC100221041 7895
ID=exon-XM_030285760.2 LOC100221041 8757
ID=exon-XM_030285720.2 LOC100221041 8656
ID=exon-XM_030285738.2 LOC100221041 8183
ID=exon-XM_030285728.2 LOC100221041 8402
ID=exon-XM_030285733.2 LOC100221041 7398
ID=exon-XM_030285715.2 LOC100221041 8780
ID=exon-XM_030285707.2 LOC100221041 8963
ID=exon-XM_030285694.2 DCBLD2 5838
ID=exon-XM_030285774.2 CMSS1 1440
ID=exon-XM_012570107.3 CMSS1 1502
ID=exon-XM_012570104.3 FILIP1L 6371
ID=exon-XM_030285654.2 FILIP1L 6456
ID=exon-XM_030285647.2 FILIP1L 6488
ID=exon-XM_032751000.1 FILIP1L 5886
ID=exon-XM_030285671.2 FILIP1L 5622
ID=exon-XM_030285682.2 FILIP1L 5395
ID=exon-XR_004369230.1 LOC116808959 2289
我试过这个
ID=exon-XM_030285707.2 LOC100221041 8963
ID=exon-XM_030285694.2 DCBLD2 5838
ID=exon-XM_012570107.3 CMSS1 1502
ID=exon-XM_030285647.2 FILIP1L 6488
ID=exon-XR_004369230.1 LOC116808959 2289
从这里
但我也希望在输出中保留$1
,并保持与输入中相同的顺序
答案是什么
显示了如何构建一个保持原始顺序的数组,但我想将两者结合起来您可以使用此
awk
:
awk'!(最大值为$2)| |$3>最大值[$2]{
如果(!(最多2美元))
ord[++n]=2美元
最高[$2]=3美元
记录[$2]=$0
}
结束{
对于(i=1;i您能否尝试使用GNUawk
中显示的样本编写并测试以下内容
awk -f avg.sh test | awk 'BEGIN {OFS = "\t"} arr[$2]==0 {arr[$2]=$3} ($3 > arr[$2]) {arr[$2]=$3} END{for (i in arr) {print i, arr[i]}}'
要进入制表符分隔的表单,请尝试以下操作
ID=exon-XM_030285707.2 1 8963
ID=exon-XM_030285694.2 2 5838
ID=exon-XM_012570107.3 3 1502
ID=exon-XM_030285647.2 4 6488
ID=exon-XR_004369230.1 5 2289
awk-vofs=“\t”
!arr1[$2]++{
找到[++计数]=$2
}
{
arr[$2]=(arr[$2]>3美元?arr[$2]:$3)
val[$2/s$3]=$1
}
结束{
对于(i=1;i您可以使用sort和awk
如果订购是可选的
awk -v OFS="\t" '
!arr1[$2]++{
found[++count]=$2
}
{
arr[$2]=(arr[$2]>$3?arr[$2]:$3)
val[$2 OFS $3]=$1
}
END{
for(i=1;i<=count;i++){
print val[found[i] OFS arr[found[i]]],found[i],arr[found[i]]
}
}' Input_file |
column -t -s $'\t'
为了保持顺序,您可以引入序号并在最后删除它们
$ sort -k2,2 -k3,3nr madza.txt | awk ' $2!=p2 { if(NR>1) print p; p=$0;p2=$2 } END { print p }'
ID=exon-XR_004369230.1 LOC116808959 2289
ID=exon-XM_030285707.2 LOC100221041 8963
ID=exon-XM_030285647.2 FILIP1L 6488
ID=exon-XM_030285694.2 DCBLD2 5838
ID=exon-XM_012570107.3 CMSS1 1502
$
sort-k2,2-k3,3-nr
应该是sort-k2,2-k3,3nr
@rowboat..是的,对..我也在挣扎..谢谢你指出
$ sort -k2,2 -k3,3nr madza.txt | awk ' $2!=p2 { if(NR>1) print p; p=$0;p2=$2 } END { print p }'
ID=exon-XR_004369230.1 LOC116808959 2289
ID=exon-XM_030285707.2 LOC100221041 8963
ID=exon-XM_030285647.2 FILIP1L 6488
ID=exon-XM_030285694.2 DCBLD2 5838
ID=exon-XM_012570107.3 CMSS1 1502
$
$ awk ' { $(NF+1)=NR}1 ' madza.txt | sort -k2,2 -k3,3nr | awk ' $2!=p2 { if(NR>1) print p; p=$0;p2=$2 } END { print p }' | sort -k4 -n | awk ' {NF=NF-1}1 '
ID=exon-XM_030285707.2 LOC100221041 8963
ID=exon-XM_030285694.2 DCBLD2 5838
ID=exon-XM_012570107.3 CMSS1 1502
ID=exon-XM_030285647.2 FILIP1L 6488
ID=exon-XR_004369230.1 LOC116808959 2289
$