Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/bash/15.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用bash脚本转置特定列_Bash_Awk_Sed_Transpose - Fatal编程技术网

使用bash脚本转置特定列

使用bash脚本转置特定列,bash,awk,sed,transpose,Bash,Awk,Sed,Transpose,我有一个具有以下模式的巨大文件(沿文件重复数千次): 我想捕获第一个块的第一列和第三列(单词和Obs频率)(从只包含#的行开始,到包含“Other”的行结束)并转置它们。在下面的模块中,我只想在第一次换位下对Obs频率进行换位。输出文件应如下所示: AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG A

我有一个具有以下模式的巨大文件(沿文件重复数千次):

我想捕获第一个块的第一列和第三列(单词和Obs频率)(从只包含#的行开始,到包含“Other”的行结束)并转置它们。在下面的模块中,我只想在第一次换位下对Obs频率进行换位。输出文件应如下所示:

    AAAA    AAAC    AAAG    AAAT    AACA    AACC    AACG    AACT    AAGA    AAGC    AAGG    AAGT    AATA    AATC    AATG    AATT    ACAA    ACAC    ACAG    ACAT    ACCA    ACCC    ACCG    ACCT    ACGA    ACGC    ACGG    ACGT    ACTA    ACTC    ACTG    ACTT    AGAA    AGAC    AGAG    AGAT    AGCA    AGCC    AGCG    AGCT    AGGA    AGGC    AGGG    AGGT    AGTA    AGTC    AGTG    AGTT    ATAA    ATAC    ATAG    ATAT    ATCA    ATCC    ATCG    ATCT    ATGA    ATGC    ATGG    ATGT    ATTA    ATTC    ATTG    ATTT    CAAA    CAAC    CAAG    CAAT    CACA    CACC    CACG    CACT    CAGA    CAGC    CAGG    CAGT    CATA    CATC    CATG    CATT    CCAA    CCAC    CCAG    CCAT    CCCA    CCCC    CCCG    CCCT    CCGA    CCGC    CCGG    CCGT    CCTA    CCTC    CCTG    CCTT    CGAA    CGAC    CGAG    CGAT    CGCA    CGCC    CGCG    CGCT    CGGA    CGGC    CGGG    CGGT    CGTA    CGTC    CGTG    CGTT    CTAA    CTAC    CTAG    CTAT    CTCA    CTCC    CTCG    CTCT    CTGA    CTGC    CTGG    CTGT    CTTA    CTTC    CTTG    CTTT    GAAA    GAAC    GAAG    GAAT    GACA    GACC    GACG    GACT    GAGA    GAGC    GAGG    GAGT    GATA    GATC    GATG    GATT    GCAA    GCAC    GCAG    GCAT    GCCA    GCCC    GCCG    GCCT    GCGA    GCGC    GCGG    GCGT    GCTA    GCTC    GCTG    GCTT    GGAA    GGAC    GGAG    GGAT    GGCA    GGCC    GGCG    GGCT    GGGA    GGGC    GGGG    GGGT    GGTA    GGTC    GGTG    GGTT    GTAA    GTAC    GTAG    GTAT    GTCA    GTCC    GTCG    GTCT    GTGA    GTGC    GTGG    GTGT    GTTA    GTTC    GTTG    GTTT    TAAA    TAAC    TAAG    TAAT    TACA    TACC    TACG    TACT    TAGA    TAGC    TAGG    TAGT    TATA    TATC    TATG    TATT    TCAA    TCAC    TCAG    TCAT    TCCA    TCCC    TCCG    TCCT    TCGA    TCGC    TCGG    TCGT    TCTA    TCTC    TCTG    TCTT    TGAA    TGAC    TGAG    TGAT    TGCA    TGCC    TGCG    TGCT    TGGA    TGGC    TGGG    TGGT    TGTA    TGTC    TGTG    TGTT    TTAA    TTAC    TTAG    TTAT    TTCA    TTCC    TTCG    TTCT    TTGA    TTGC    TTGG    TTGT    TTTA    TTTC    TTTG    TTTT
s21_contig00001 0.0125181   0.0069468   0.0058463   0.0078066   0.0061215   0.0029920   0.0057776   0.0028200   0.0050210   0.0064654   0.0048834   0.0029920   0.0051585   0.0052617   0.0055024   0.0048146   0.0062934   0.0024761   0.0031639   0.0041956   0.0024417   0.0015820   0.0041956   0.0014444   0.0047459   0.0030607   0.0035078   0.0028200   0.0013756   0.0015820   0.0022010   0.0029920   0.0048146   0.0019259   0.0020978   0.0026481   0.0049866   0.0035422   0.0058463   0.0029576   0.0040581   0.0031295   0.0028888   0.0014444   0.0016163   0.0015820   0.0021322   0.0028200   0.0041268   0.0029576   0.0026137   0.0058463   0.0048490   0.0040237   0.0070156   0.0026481   0.0067749   0.0041956   0.0050554   0.0041956   0.0029232   0.0052617   0.0047459   0.0078066   0.0080473   0.0046771   0.0053305   0.0047459   0.0027856   0.0030263   0.0024761   0.0021322   0.0017883   0.0052273   0.0018915   0.0022010   0.0037141   0.0066717   0.0043332   0.0055024   0.0049522   0.0024417   0.0021666   0.0050554   0.0026481   0.0032327   0.0027856   0.0028888   0.0037829   0.0057432   0.0037829   0.0035078   0.0016851   0.0030951   0.0018915   0.0048834   0.0055712   0.0034734   0.0033015   0.0070156   0.0032327   0.0062934   0.0041268   0.0058463   0.0039893   0.0058807   0.0027856   0.0041956   0.0020978   0.0037829   0.0024761   0.0057776   0.0016163   0.0015820   0.0006878   0.0026137   0.0024073   0.0037485   0.0033015   0.0020978   0.0024417   0.0033359   0.0021666   0.0031639   0.0023729   0.0058120   0.0053305   0.0058463   0.0084944   0.0043332   0.0058120   0.0052617   0.0037829   0.0020634   0.0037829   0.0015820   0.0031983   0.0036798   0.0030951   0.0015820   0.0027512   0.0038517   0.0066717   0.0052617   0.0059151   0.0025105   0.0033359   0.0041956   0.0050210   0.0027856   0.0058807   0.0031295   0.0051929   0.0055024   0.0057432   0.0030607   0.0019602   0.0036798   0.0052273   0.0064654   0.0064654   0.0022698   0.0037485   0.0040237   0.0045739   0.0024073   0.0062934   0.0035422   0.0039549   0.0027856   0.0032327   0.0015820   0.0015820   0.0020634   0.0030263   0.0029920   0.0024073   0.0017883   0.0015820   0.0029576   0.0035422   0.0022698   0.0034734   0.0019259   0.0029920   0.0025105   0.0024417   0.0024761   0.0017539   0.0043332   0.0046771   0.0069468   0.0040581   0.0017539   0.0023729   0.0029232   0.0034390   0.0015820   0.0020978   0.0016163   0.0014788   0.0019602   0.0016851   0.0013756   0.0039205   0.0027512   0.0037141   0.0051585   0.0056400   0.0029920   0.0024417   0.0067749   0.0045051   0.0039549   0.0039893   0.0040581   0.0056400   0.0051929   0.0037829   0.0047459   0.0014788   0.0031983   0.0017883   0.0050210   0.0070500   0.0035422   0.0024073   0.0048490   0.0031639   0.0045739   0.0032327   0.0049866   0.0045051   0.0050210   0.0026481   0.0024417   0.0034390   0.0037829   0.0027856   0.0061215   0.0029576   0.0024073   0.0016163   0.0041268   0.0070500   0.0064654   0.0055712   0.0048146   0.0056400   0.0059151   0.0049522   0.0062934   0.0040581   0.0084944   0.0080473   0.0125181
s21_contig00002 0.0125181   0.0069468   0.0058463   0.0078066   0.0061215   0.0029920   0.0057776   0.0028200   0.0050210   0.0064654   0.0048834   0.0029920   0.0051585   0.0052617   0.0055024   0.0048146   0.0062934   0.0024761   0.0031639   0.0041956   0.0024417   0.0015820   0.0041956   0.0014444   0.0047459   0.0030607   0.0035078   0.0028200   0.0013756   0.0015820   0.0022010   0.0029920   0.0048146   0.0019259   0.0020978   0.0026481   0.0049866   0.0035422   0.0058463   0.0029576   0.0040581   0.0031295   0.0028888   0.0014444   0.0016163   0.0015820   0.0021322   0.0028200   0.0041268   0.0029576   0.0026137   0.0058463   0.0048490   0.0040237   0.0070156   0.0026481   0.0067749   0.0041956   0.0050554   0.0041956   0.0029232   0.0052617   0.0047459   0.0078066   0.0080473   0.0046771   0.0053305   0.0047459   0.0027856   0.0030263   0.0024761   0.0021322   0.0017883   0.0052273   0.0018915   0.0022010   0.0037141   0.0066717   0.0043332   0.0055024   0.0049522   0.0024417   0.0021666   0.0050554   0.0026481   0.0032327   0.0027856   0.0028888   0.0037829   0.0057432   0.0037829   0.0035078   0.0016851   0.0030951   0.0018915   0.0048834   0.0055712   0.0034734   0.0033015   0.0070156   0.0032327   0.0062934   0.0041268   0.0058463   0.0039893   0.0058807   0.0027856   0.0041956   0.0020978   0.0037829   0.0024761   0.0057776   0.0016163   0.0015820   0.0006878   0.0026137   0.0024073   0.0037485   0.0033015   0.0020978   0.0024417   0.0033359   0.0021666   0.0031639   0.0023729   0.0058120   0.0053305   0.0058463   0.0084944   0.0043332   0.0058120   0.0052617   0.0037829   0.0020634   0.0037829   0.0015820   0.0031983   0.0036798   0.0030951   0.0015820   0.0027512   0.0038517   0.0066717   0.0052617   0.0059151   0.0025105   0.0033359   0.0041956   0.0050210   0.0027856   0.0058807   0.0031295   0.0051929   0.0055024   0.0057432   0.0030607   0.0019602   0.0036798   0.0052273   0.0064654   0.0064654   0.0022698   0.0037485   0.0040237   0.0045739   0.0024073   0.0062934   0.0035422   0.0039549   0.0027856   0.0032327   0.0015820   0.0015820   0.0020634   0.0030263   0.0029920   0.0024073   0.0017883   0.0015820   0.0029576   0.0035422   0.0022698   0.0034734   0.0019259   0.0029920   0.0025105   0.0024417   0.0024761   0.0017539   0.0043332   0.0046771   0.0069468   0.0040581   0.0017539   0.0023729   0.0029232   0.0034390   0.0015820   0.0020978   0.0016163   0.0014788   0.0019602   0.0016851   0.0013756   0.0039205   0.0027512   0.0037141   0.0051585   0.0056400   0.0029920   0.0024417   0.0067749   0.0045051   0.0039549   0.0039893   0.0040581   0.0056400   0.0051929   0.0037829   0.0047459   0.0014788   0.0031983   0.0017883   0.0050210   0.0070500   0.0035422   0.0024073   0.0048490   0.0031639   0.0045739   0.0032327   0.0049866   0.0045051   0.0050210   0.0026481   0.0024417   0.0034390   0.0037829   0.0027856   0.0061215   0.0029576   0.0024073   0.0016163   0.0041268   0.0070500   0.0064654   0.0055712   0.0048146   0.0056400   0.0059151   0.0049522   0.0062934   0.0040581   0.0084944   0.0080473   0.0125181

重要的是,位于语句“输入序列是:”下的带有模式“21_contig”的每个块的标识符应放在第一列中,取代“Obs频率”。

此awk脚本应满足您的需要:

script.awk的内容: 输出:
此awk脚本应满足您的需要:

script.awk的内容: 输出: 这似乎也起作用(将此代码另存为
transpose.awk
):

根据给定数据:

                 AAAA      AAAC      AAAG      AAAT      AACA      AACC      AACG      AACT      AAGA      AAGC      AAGG      AAGT      AATA      AATC      AATG      AATT      ACAA      ACAC      ACAG      ACAT      ACCA      ACCC      ACCG      ACCT      ACGA      ACGC      ACGG      ACGT      ACTA      ACTC      ACTG      ACTT      AGAA      AGAC      AGAG      AGAT      AGCA      AGCC      AGCG      AGCT      AGGA      AGGC      AGGG      AGGT      AGTA      AGTC      AGTG      AGTT      ATAA      ATAC      ATAG      ATAT      ATCA      ATCC      ATCG      ATCT      ATGA      ATGC      ATGG      ATGT      ATTA      ATTC      ATTG      ATTT      CAAA      CAAC      CAAG      CAAT      CACA      CACC      CACG      CACT      CAGA      CAGC      CAGG      CAGT      CATA      CATC      CATG      CATT      CCAA      CCAC      CCAG      CCAT      CCCA      CCCC      CCCG      CCCT      CCGA      CCGC      CCGG      CCGT      CCTA      CCTC      CCTG      CCTT      CGAA      CGAC      CGAG      CGAT      CGCA      CGCC      CGCG      CGCT      CGGA      CGGC      CGGG      CGGT      CGTA      CGTC      CGTG      CGTT      CTAA      CTAC      CTAG      CTAT      CTCA      CTCC      CTCG      CTCT      CTGA      CTGC      CTGG      CTGT      CTTA      CTTC      CTTG      CTTT      GAAA      GAAC      GAAG      GAAT      GACA      GACC      GACG      GACT      GAGA      GAGC      GAGG      GAGT      GATA      GATC      GATG      GATT      GCAA      GCAC      GCAG      GCAT      GCCA      GCCC      GCCG      GCCT      GCGA      GCGC      GCGG      GCGT      GCTA      GCTC      GCTG      GCTT      GGAA      GGAC      GGAG      GGAT      GGCA      GGCC      GGCG      GGCT      GGGA      GGGC      GGGG      GGGT      GGTA      GGTC      GGTG      GGTT      GTAA      GTAC      GTAG      GTAT      GTCA      GTCC      GTCG      GTCT      GTGA      GTGC      GTGG      GTGT      GTTA      GTTC      GTTG      GTTT      TAAA      TAAC      TAAG      TAAT      TACA      TACC      TACG      TACT      TAGA      TAGC      TAGG      TAGT      TATA      TATC      TATG      TATT      TCAA      TCAC      TCAG      TCAT      TCCA      TCCC      TCCG      TCCT      TCGA      TCGC      TCGG      TCGT      TCTA      TCTC      TCTG      TCTT      TGAA      TGAC      TGAG      TGAT      TGCA      TGCC      TGCG      TGCT      TGGA      TGGC      TGGG      TGGT      TGTA      TGTC      TGTG      TGTT      TTAA      TTAC      TTAG      TTAT      TTCA      TTCC      TTCG      TTCT      TTGA      TTGC      TTGG      TTGT      TTTA      TTTC      TTTG      TTTT
s21_contig00001  0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002  0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
代码按遇到的顺序打印[ACGT]代码序列,并根据需要添加新值。如果某个源中缺少某个值,则该值将在输出中显示为空白字段。标题行对应于第一组数据结束时的[ACGT]代码序列列表;代码不会再次尝试打印标题。

这似乎也可以(将此代码另存为
transpose.awk
):

根据给定数据:

                 AAAA      AAAC      AAAG      AAAT      AACA      AACC      AACG      AACT      AAGA      AAGC      AAGG      AAGT      AATA      AATC      AATG      AATT      ACAA      ACAC      ACAG      ACAT      ACCA      ACCC      ACCG      ACCT      ACGA      ACGC      ACGG      ACGT      ACTA      ACTC      ACTG      ACTT      AGAA      AGAC      AGAG      AGAT      AGCA      AGCC      AGCG      AGCT      AGGA      AGGC      AGGG      AGGT      AGTA      AGTC      AGTG      AGTT      ATAA      ATAC      ATAG      ATAT      ATCA      ATCC      ATCG      ATCT      ATGA      ATGC      ATGG      ATGT      ATTA      ATTC      ATTG      ATTT      CAAA      CAAC      CAAG      CAAT      CACA      CACC      CACG      CACT      CAGA      CAGC      CAGG      CAGT      CATA      CATC      CATG      CATT      CCAA      CCAC      CCAG      CCAT      CCCA      CCCC      CCCG      CCCT      CCGA      CCGC      CCGG      CCGT      CCTA      CCTC      CCTG      CCTT      CGAA      CGAC      CGAG      CGAT      CGCA      CGCC      CGCG      CGCT      CGGA      CGGC      CGGG      CGGT      CGTA      CGTC      CGTG      CGTT      CTAA      CTAC      CTAG      CTAT      CTCA      CTCC      CTCG      CTCT      CTGA      CTGC      CTGG      CTGT      CTTA      CTTC      CTTG      CTTT      GAAA      GAAC      GAAG      GAAT      GACA      GACC      GACG      GACT      GAGA      GAGC      GAGG      GAGT      GATA      GATC      GATG      GATT      GCAA      GCAC      GCAG      GCAT      GCCA      GCCC      GCCG      GCCT      GCGA      GCGC      GCGG      GCGT      GCTA      GCTC      GCTG      GCTT      GGAA      GGAC      GGAG      GGAT      GGCA      GGCC      GGCG      GGCT      GGGA      GGGC      GGGG      GGGT      GGTA      GGTC      GGTG      GGTT      GTAA      GTAC      GTAG      GTAT      GTCA      GTCC      GTCG      GTCT      GTGA      GTGC      GTGG      GTGT      GTTA      GTTC      GTTG      GTTT      TAAA      TAAC      TAAG      TAAT      TACA      TACC      TACG      TACT      TAGA      TAGC      TAGG      TAGT      TATA      TATC      TATG      TATT      TCAA      TCAC      TCAG      TCAT      TCCA      TCCC      TCCG      TCCT      TCGA      TCGC      TCGG      TCGT      TCTA      TCTC      TCTG      TCTT      TGAA      TGAC      TGAG      TGAT      TGCA      TGCC      TGCG      TGCT      TGGA      TGGC      TGGG      TGGT      TGTA      TGTC      TGTG      TGTT      TTAA      TTAC      TTAG      TTAT      TTCA      TTCC      TTCG      TTCT      TTGA      TTGC      TTGG      TTGT      TTTA      TTTC      TTTG      TTTT
s21_contig00001  0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002  0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181

代码按遇到的顺序打印[ACGT]代码序列,并根据需要添加新值。如果某个源中缺少某个值,则该值将在输出中显示为空白字段。标题行对应于第一组数据结束时的[ACGT]代码序列列表;代码从不尝试再次打印标题。

一次打印标题是一项挑战。这个脚本实现了这一点。输出的格式设置为与提供的示例匹配,但可以轻松调整。下面的内联注释解释了脚本操作

#!/bin/bash

[ -f "$1" ] || {
    printf "\n  Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
    exit 1
}

## this script requires the header row to be equal for each sequence

key="${2:-s21}"         # key to identify sequence ( 3 chars ) default "s21"
currentseq=""           # variable to hold sequence

declare -i needhdr=0    # flag to control print header
declare -i seqcnt=0     # sequence count
declare -a obsfarray    # array to hold Obs Frequency

## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do

    ## capture inputseq from obscnt
    if [ "z${obscnt:0:3}" = "z${key}" ]; then
        # if sequence count > 0 headers is already printed and ready to print data
        if [ $seqcnt -gt 0 ]; then
            needhdr=1                           # set need header to false
            printf "\n%s" "$inputseq"           # print newline followed by input sequence
            for i in ${obsfarray[@]}; do        # print the Obs Frequency values
                printf "  $i"
            done
            unset obsfarray                     # unset the array for next sequence
        fi
        inputseq="${obscnt}"                    # set the inputseq valued from obscnt
        ((seqcnt++))                            # increment the seqcnt
    fi

    ## print header, capture obsfreq values
    # test that first char is A C G T
    if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] || 
    [ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
        if [ "z${word:1:1}" != "zo" ]; then                 # get rid of pesky 'Total'
            [ $needhdr -eq 0 ] && printf "    %s" "$word"   # print header
            obsfarray+=( "$obsfreq" )                       # fill Obs Frequency array
        fi
    fi

    currentseq="$inputseq"    # keep current seq to test for new value

done <"$1"

# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
    printf "  $i"
done
unset obsfarray

exit 0
#/bin/bash
[-f“$1”]|{
printf“\n错误:输入不足,找不到文件“%s”。\n\n”“${0/*\/}”
出口1
}
##此脚本要求每个序列的标题行相等
key=“${2:-s21}”#标识序列(3个字符)的键默认为“s21”
currentseq=”“#变量保持序列
declare-i needhdr=0#控制打印头的标志
declare-i seqcnt=0#序列计数
声明-用于保存Obs频率的obsfarray数组
##对数据文件进行单次传递
读时-r单词obscnt obsrefq expfreq oefreq | | |[-n“$word”];做
##从obscnt捕获inputseq
如果[“z${obscnt:0:3}”=“z${key}”];然后
#如果序列计数>0,则已打印标题并准备打印数据
如果[$seqcnt-gt 0];然后
needhdr=1#将需要标头设置为false
printf“\n%s”“$inputseq”#打印换行符,后跟输入序列
对于${obsfarray[@]}中的i;是否打印Obs频率值
printf“$i”
完成
取消设置obsfarray#为下一个序列取消设置数组
fi
inputseq=“${obscnt}”#设置obscnt中的inputseq值
((seqcnt++)#增加seqcnt
fi
##打印标题,捕获obsfreq值
#测试第一个字符是否为CGT
如果[“z${word:0:1}”=“zA”]| |[“z${word:0:1}”=“zC”]| |
[“z${word:0:1}”=“zG”]| |[“z${word:0:1}”=“zT”];然后
如果[“z${word:1:1}”!=“zo”];然后#去掉讨厌的“Total”
[$needhdr-eq 0]&&printf“%s”$word#打印头
obsfarray+=(“$obsfreq”)#填充Obs频率阵列
fi
fi
currentseq=“$inputseq”#保持当前seq以测试新值

一次完成是一项挑战。这个脚本实现了这一点。输出的格式设置为与提供的示例匹配,但可以轻松调整。下面的内联注释解释了脚本操作

#!/bin/bash

[ -f "$1" ] || {
    printf "\n  Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
    exit 1
}

## this script requires the header row to be equal for each sequence

key="${2:-s21}"         # key to identify sequence ( 3 chars ) default "s21"
currentseq=""           # variable to hold sequence

declare -i needhdr=0    # flag to control print header
declare -i seqcnt=0     # sequence count
declare -a obsfarray    # array to hold Obs Frequency

## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do

    ## capture inputseq from obscnt
    if [ "z${obscnt:0:3}" = "z${key}" ]; then
        # if sequence count > 0 headers is already printed and ready to print data
        if [ $seqcnt -gt 0 ]; then
            needhdr=1                           # set need header to false
            printf "\n%s" "$inputseq"           # print newline followed by input sequence
            for i in ${obsfarray[@]}; do        # print the Obs Frequency values
                printf "  $i"
            done
            unset obsfarray                     # unset the array for next sequence
        fi
        inputseq="${obscnt}"                    # set the inputseq valued from obscnt
        ((seqcnt++))                            # increment the seqcnt
    fi

    ## print header, capture obsfreq values
    # test that first char is A C G T
    if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] || 
    [ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
        if [ "z${word:1:1}" != "zo" ]; then                 # get rid of pesky 'Total'
            [ $needhdr -eq 0 ] && printf "    %s" "$word"   # print header
            obsfarray+=( "$obsfreq" )                       # fill Obs Frequency array
        fi
    fi

    currentseq="$inputseq"    # keep current seq to test for new value

done <"$1"

# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
    printf "  $i"
done
unset obsfarray

exit 0
#/bin/bash
[-f“$1”]|{
printf“\n错误:输入不足,找不到文件“%s”。\n\n”“${0/*\/}”
出口1
}
##此脚本要求每个序列的标题行相等
key=“${2:-s21}”#标识序列(3个字符)的键默认为“s21”
currentseq=”“#变量保持序列
declare-i needhdr=0#控制打印头的标志
declare-i seqcnt=0#序列计数
声明-用于保存Obs频率的obsfarray数组
##对数据文件进行单次传递
读时-r单词obscnt obsrefq expfreq oefreq | | |[-n“$word”];做
##从obscnt捕获inputseq
如果[“z${obscnt:0:3}”=“z${key}”];然后
#如果序列计数>0,则已打印标题并准备打印数据
如果[$seqcnt-gt 0];然后
needhdr=1#将需要标头设置为false
printf“\n%s”“$inputseq”#打印换行符,后跟输入序列
对于${obsfarray[@]}中的i;是否打印Obs频率值
printf“$i”
完成
取消设置obsfarray#为下一个序列取消设置数组
fi
inputseq=“${obscnt}”#设置obscnt中的inputseq值
((seqcnt++)#增加seqcnt
fi
##打印标题,捕获obsfreq值
#测试第一个字符是否为CGT
如果[“z${word:0:1}”=“zA”]| |[“z${word:0:1}”=“zC”]| |
[“z${word:0:1}”=“zG”]| |[“z${word:0:1}”=“zT”];然后
如果[“z${word:1:1}”!=“zo”];然后#去掉讨厌的“Total”
[$needhdr-eq 0]&&printf“%s”$word#打印头
obsfarray+=(“$obsfreq”)#填充Obs频率阵列
fi
fi
currentseq=“$input
/^# +s21_contig[0-9]+/ {
        if (source) print_results()
        source = $2
}
/^[ACGT]+ / {
        if (!($1 in key))
        {
                key[$1] = 1
                seq[++nkeys] = $1
        }
        obs[$1] = $3
}
END { print_results() }
function print_results(     i)
{
        if (printed_header == 0)
        {
                pad = "                 "
                for (i = 1; i <= nkeys; i++)
                {
                        printf "%s%s", pad, seq[i]
                        pad = "      "
                }
                printf "\n"
                printed_header++
        }
        printf "%s ", source
        for (i = 1; i <= nkeys; i++)
                printf " %-9s", obs[seq[i]]
        printf "\n"
        delete obs
}
awk -f transpose.awk data
                 AAAA      AAAC      AAAG      AAAT      AACA      AACC      AACG      AACT      AAGA      AAGC      AAGG      AAGT      AATA      AATC      AATG      AATT      ACAA      ACAC      ACAG      ACAT      ACCA      ACCC      ACCG      ACCT      ACGA      ACGC      ACGG      ACGT      ACTA      ACTC      ACTG      ACTT      AGAA      AGAC      AGAG      AGAT      AGCA      AGCC      AGCG      AGCT      AGGA      AGGC      AGGG      AGGT      AGTA      AGTC      AGTG      AGTT      ATAA      ATAC      ATAG      ATAT      ATCA      ATCC      ATCG      ATCT      ATGA      ATGC      ATGG      ATGT      ATTA      ATTC      ATTG      ATTT      CAAA      CAAC      CAAG      CAAT      CACA      CACC      CACG      CACT      CAGA      CAGC      CAGG      CAGT      CATA      CATC      CATG      CATT      CCAA      CCAC      CCAG      CCAT      CCCA      CCCC      CCCG      CCCT      CCGA      CCGC      CCGG      CCGT      CCTA      CCTC      CCTG      CCTT      CGAA      CGAC      CGAG      CGAT      CGCA      CGCC      CGCG      CGCT      CGGA      CGGC      CGGG      CGGT      CGTA      CGTC      CGTG      CGTT      CTAA      CTAC      CTAG      CTAT      CTCA      CTCC      CTCG      CTCT      CTGA      CTGC      CTGG      CTGT      CTTA      CTTC      CTTG      CTTT      GAAA      GAAC      GAAG      GAAT      GACA      GACC      GACG      GACT      GAGA      GAGC      GAGG      GAGT      GATA      GATC      GATG      GATT      GCAA      GCAC      GCAG      GCAT      GCCA      GCCC      GCCG      GCCT      GCGA      GCGC      GCGG      GCGT      GCTA      GCTC      GCTG      GCTT      GGAA      GGAC      GGAG      GGAT      GGCA      GGCC      GGCG      GGCT      GGGA      GGGC      GGGG      GGGT      GGTA      GGTC      GGTG      GGTT      GTAA      GTAC      GTAG      GTAT      GTCA      GTCC      GTCG      GTCT      GTGA      GTGC      GTGG      GTGT      GTTA      GTTC      GTTG      GTTT      TAAA      TAAC      TAAG      TAAT      TACA      TACC      TACG      TACT      TAGA      TAGC      TAGG      TAGT      TATA      TATC      TATG      TATT      TCAA      TCAC      TCAG      TCAT      TCCA      TCCC      TCCG      TCCT      TCGA      TCGC      TCGG      TCGT      TCTA      TCTC      TCTG      TCTT      TGAA      TGAC      TGAG      TGAT      TGCA      TGCC      TGCG      TGCT      TGGA      TGGC      TGGG      TGGT      TGTA      TGTC      TGTG      TGTT      TTAA      TTAC      TTAG      TTAT      TTCA      TTCC      TTCG      TTCT      TTGA      TTGC      TTGG      TTGT      TTTA      TTTC      TTTG      TTTT
s21_contig00001  0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002  0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
#!/bin/bash

[ -f "$1" ] || {
    printf "\n  Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
    exit 1
}

## this script requires the header row to be equal for each sequence

key="${2:-s21}"         # key to identify sequence ( 3 chars ) default "s21"
currentseq=""           # variable to hold sequence

declare -i needhdr=0    # flag to control print header
declare -i seqcnt=0     # sequence count
declare -a obsfarray    # array to hold Obs Frequency

## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do

    ## capture inputseq from obscnt
    if [ "z${obscnt:0:3}" = "z${key}" ]; then
        # if sequence count > 0 headers is already printed and ready to print data
        if [ $seqcnt -gt 0 ]; then
            needhdr=1                           # set need header to false
            printf "\n%s" "$inputseq"           # print newline followed by input sequence
            for i in ${obsfarray[@]}; do        # print the Obs Frequency values
                printf "  $i"
            done
            unset obsfarray                     # unset the array for next sequence
        fi
        inputseq="${obscnt}"                    # set the inputseq valued from obscnt
        ((seqcnt++))                            # increment the seqcnt
    fi

    ## print header, capture obsfreq values
    # test that first char is A C G T
    if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] || 
    [ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
        if [ "z${word:1:1}" != "zo" ]; then                 # get rid of pesky 'Total'
            [ $needhdr -eq 0 ] && printf "    %s" "$word"   # print header
            obsfarray+=( "$obsfreq" )                       # fill Obs Frequency array
        fi
    fi

    currentseq="$inputseq"    # keep current seq to test for new value

done <"$1"

# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
    printf "  $i"
done
unset obsfarray

exit 0
$ ./dna.sh dat/dna.dat
    AAAA    AAAC    AAAG    AAAT    AACA <snip>
s21_contig00001  0.0115837  0.0060850  0.0061659  0.0069745  0.0045890 <snip>
s21_contig00002  0.0125181  0.0069468  0.0058463  0.0078066  0.0061215 <snip>