使用bash脚本转置特定列
我有一个具有以下模式的巨大文件(沿文件重复数千次): 我想捕获第一个块的第一列和第三列(单词和Obs频率)(从只包含#的行开始,到包含“Other”的行结束)并转置它们。在下面的模块中,我只想在第一次换位下对Obs频率进行换位。输出文件应如下所示:使用bash脚本转置特定列,bash,awk,sed,transpose,Bash,Awk,Sed,Transpose,我有一个具有以下模式的巨大文件(沿文件重复数千次): 我想捕获第一个块的第一列和第三列(单词和Obs频率)(从只包含#的行开始,到包含“Other”的行结束)并转置它们。在下面的模块中,我只想在第一次换位下对Obs频率进行换位。输出文件应如下所示: AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG A
AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG AATT ACAA ACAC ACAG ACAT ACCA ACCC ACCG ACCT ACGA ACGC ACGG ACGT ACTA ACTC ACTG ACTT AGAA AGAC AGAG AGAT AGCA AGCC AGCG AGCT AGGA AGGC AGGG AGGT AGTA AGTC AGTG AGTT ATAA ATAC ATAG ATAT ATCA ATCC ATCG ATCT ATGA ATGC ATGG ATGT ATTA ATTC ATTG ATTT CAAA CAAC CAAG CAAT CACA CACC CACG CACT CAGA CAGC CAGG CAGT CATA CATC CATG CATT CCAA CCAC CCAG CCAT CCCA CCCC CCCG CCCT CCGA CCGC CCGG CCGT CCTA CCTC CCTG CCTT CGAA CGAC CGAG CGAT CGCA CGCC CGCG CGCT CGGA CGGC CGGG CGGT CGTA CGTC CGTG CGTT CTAA CTAC CTAG CTAT CTCA CTCC CTCG CTCT CTGA CTGC CTGG CTGT CTTA CTTC CTTG CTTT GAAA GAAC GAAG GAAT GACA GACC GACG GACT GAGA GAGC GAGG GAGT GATA GATC GATG GATT GCAA GCAC GCAG GCAT GCCA GCCC GCCG GCCT GCGA GCGC GCGG GCGT GCTA GCTC GCTG GCTT GGAA GGAC GGAG GGAT GGCA GGCC GGCG GGCT GGGA GGGC GGGG GGGT GGTA GGTC GGTG GGTT GTAA GTAC GTAG GTAT GTCA GTCC GTCG GTCT GTGA GTGC GTGG GTGT GTTA GTTC GTTG GTTT TAAA TAAC TAAG TAAT TACA TACC TACG TACT TAGA TAGC TAGG TAGT TATA TATC TATG TATT TCAA TCAC TCAG TCAT TCCA TCCC TCCG TCCT TCGA TCGC TCGG TCGT TCTA TCTC TCTG TCTT TGAA TGAC TGAG TGAT TGCA TGCC TGCG TGCT TGGA TGGC TGGG TGGT TGTA TGTC TGTG TGTT TTAA TTAC TTAG TTAT TTCA TTCC TTCG TTCT TTGA TTGC TTGG TTGT TTTA TTTC TTTG TTTT
s21_contig00001 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
重要的是,位于语句“输入序列是:”下的带有模式“21_contig”的每个块的标识符应放在第一列中,取代“Obs频率”。此awk脚本应满足您的需要: script.awk的内容: 输出:
此awk脚本应满足您的需要: script.awk的内容: 输出: 这似乎也起作用(将此代码另存为
transpose.awk
):
根据给定数据:
AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG AATT ACAA ACAC ACAG ACAT ACCA ACCC ACCG ACCT ACGA ACGC ACGG ACGT ACTA ACTC ACTG ACTT AGAA AGAC AGAG AGAT AGCA AGCC AGCG AGCT AGGA AGGC AGGG AGGT AGTA AGTC AGTG AGTT ATAA ATAC ATAG ATAT ATCA ATCC ATCG ATCT ATGA ATGC ATGG ATGT ATTA ATTC ATTG ATTT CAAA CAAC CAAG CAAT CACA CACC CACG CACT CAGA CAGC CAGG CAGT CATA CATC CATG CATT CCAA CCAC CCAG CCAT CCCA CCCC CCCG CCCT CCGA CCGC CCGG CCGT CCTA CCTC CCTG CCTT CGAA CGAC CGAG CGAT CGCA CGCC CGCG CGCT CGGA CGGC CGGG CGGT CGTA CGTC CGTG CGTT CTAA CTAC CTAG CTAT CTCA CTCC CTCG CTCT CTGA CTGC CTGG CTGT CTTA CTTC CTTG CTTT GAAA GAAC GAAG GAAT GACA GACC GACG GACT GAGA GAGC GAGG GAGT GATA GATC GATG GATT GCAA GCAC GCAG GCAT GCCA GCCC GCCG GCCT GCGA GCGC GCGG GCGT GCTA GCTC GCTG GCTT GGAA GGAC GGAG GGAT GGCA GGCC GGCG GGCT GGGA GGGC GGGG GGGT GGTA GGTC GGTG GGTT GTAA GTAC GTAG GTAT GTCA GTCC GTCG GTCT GTGA GTGC GTGG GTGT GTTA GTTC GTTG GTTT TAAA TAAC TAAG TAAT TACA TACC TACG TACT TAGA TAGC TAGG TAGT TATA TATC TATG TATT TCAA TCAC TCAG TCAT TCCA TCCC TCCG TCCT TCGA TCGC TCGG TCGT TCTA TCTC TCTG TCTT TGAA TGAC TGAG TGAT TGCA TGCC TGCG TGCT TGGA TGGC TGGG TGGT TGTA TGTC TGTG TGTT TTAA TTAC TTAG TTAT TTCA TTCC TTCG TTCT TTGA TTGC TTGG TTGT TTTA TTTC TTTG TTTT
s21_contig00001 0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
代码按遇到的顺序打印[ACGT]代码序列,并根据需要添加新值。如果某个源中缺少某个值,则该值将在输出中显示为空白字段。标题行对应于第一组数据结束时的[ACGT]代码序列列表;代码不会再次尝试打印标题。这似乎也可以(将此代码另存为transpose.awk
):
根据给定数据:
AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG AATT ACAA ACAC ACAG ACAT ACCA ACCC ACCG ACCT ACGA ACGC ACGG ACGT ACTA ACTC ACTG ACTT AGAA AGAC AGAG AGAT AGCA AGCC AGCG AGCT AGGA AGGC AGGG AGGT AGTA AGTC AGTG AGTT ATAA ATAC ATAG ATAT ATCA ATCC ATCG ATCT ATGA ATGC ATGG ATGT ATTA ATTC ATTG ATTT CAAA CAAC CAAG CAAT CACA CACC CACG CACT CAGA CAGC CAGG CAGT CATA CATC CATG CATT CCAA CCAC CCAG CCAT CCCA CCCC CCCG CCCT CCGA CCGC CCGG CCGT CCTA CCTC CCTG CCTT CGAA CGAC CGAG CGAT CGCA CGCC CGCG CGCT CGGA CGGC CGGG CGGT CGTA CGTC CGTG CGTT CTAA CTAC CTAG CTAT CTCA CTCC CTCG CTCT CTGA CTGC CTGG CTGT CTTA CTTC CTTG CTTT GAAA GAAC GAAG GAAT GACA GACC GACG GACT GAGA GAGC GAGG GAGT GATA GATC GATG GATT GCAA GCAC GCAG GCAT GCCA GCCC GCCG GCCT GCGA GCGC GCGG GCGT GCTA GCTC GCTG GCTT GGAA GGAC GGAG GGAT GGCA GGCC GGCG GGCT GGGA GGGC GGGG GGGT GGTA GGTC GGTG GGTT GTAA GTAC GTAG GTAT GTCA GTCC GTCG GTCT GTGA GTGC GTGG GTGT GTTA GTTC GTTG GTTT TAAA TAAC TAAG TAAT TACA TACC TACG TACT TAGA TAGC TAGG TAGT TATA TATC TATG TATT TCAA TCAC TCAG TCAT TCCA TCCC TCCG TCCT TCGA TCGC TCGG TCGT TCTA TCTC TCTG TCTT TGAA TGAC TGAG TGAT TGCA TGCC TGCG TGCT TGGA TGGC TGGG TGGT TGTA TGTC TGTG TGTT TTAA TTAC TTAG TTAT TTCA TTCC TTCG TTCT TTGA TTGC TTGG TTGT TTTA TTTC TTTG TTTT
s21_contig00001 0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
代码按遇到的顺序打印[ACGT]代码序列,并根据需要添加新值。如果某个源中缺少某个值,则该值将在输出中显示为空白字段。标题行对应于第一组数据结束时的[ACGT]代码序列列表;代码从不尝试再次打印标题。一次打印标题是一项挑战。这个脚本实现了这一点。输出的格式设置为与提供的示例匹配,但可以轻松调整。下面的内联注释解释了脚本操作
#!/bin/bash
[ -f "$1" ] || {
printf "\n Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
exit 1
}
## this script requires the header row to be equal for each sequence
key="${2:-s21}" # key to identify sequence ( 3 chars ) default "s21"
currentseq="" # variable to hold sequence
declare -i needhdr=0 # flag to control print header
declare -i seqcnt=0 # sequence count
declare -a obsfarray # array to hold Obs Frequency
## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do
## capture inputseq from obscnt
if [ "z${obscnt:0:3}" = "z${key}" ]; then
# if sequence count > 0 headers is already printed and ready to print data
if [ $seqcnt -gt 0 ]; then
needhdr=1 # set need header to false
printf "\n%s" "$inputseq" # print newline followed by input sequence
for i in ${obsfarray[@]}; do # print the Obs Frequency values
printf " $i"
done
unset obsfarray # unset the array for next sequence
fi
inputseq="${obscnt}" # set the inputseq valued from obscnt
((seqcnt++)) # increment the seqcnt
fi
## print header, capture obsfreq values
# test that first char is A C G T
if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] ||
[ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
if [ "z${word:1:1}" != "zo" ]; then # get rid of pesky 'Total'
[ $needhdr -eq 0 ] && printf " %s" "$word" # print header
obsfarray+=( "$obsfreq" ) # fill Obs Frequency array
fi
fi
currentseq="$inputseq" # keep current seq to test for new value
done <"$1"
# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
printf " $i"
done
unset obsfarray
exit 0
#/bin/bash
[-f“$1”]|{
printf“\n错误:输入不足,找不到文件“%s”。\n\n”“${0/*\/}”
出口1
}
##此脚本要求每个序列的标题行相等
key=“${2:-s21}”#标识序列(3个字符)的键默认为“s21”
currentseq=”“#变量保持序列
declare-i needhdr=0#控制打印头的标志
declare-i seqcnt=0#序列计数
声明-用于保存Obs频率的obsfarray数组
##对数据文件进行单次传递
读时-r单词obscnt obsrefq expfreq oefreq | | |[-n“$word”];做
##从obscnt捕获inputseq
如果[“z${obscnt:0:3}”=“z${key}”];然后
#如果序列计数>0,则已打印标题并准备打印数据
如果[$seqcnt-gt 0];然后
needhdr=1#将需要标头设置为false
printf“\n%s”“$inputseq”#打印换行符,后跟输入序列
对于${obsfarray[@]}中的i;是否打印Obs频率值
printf“$i”
完成
取消设置obsfarray#为下一个序列取消设置数组
fi
inputseq=“${obscnt}”#设置obscnt中的inputseq值
((seqcnt++)#增加seqcnt
fi
##打印标题,捕获obsfreq值
#测试第一个字符是否为CGT
如果[“z${word:0:1}”=“zA”]| |[“z${word:0:1}”=“zC”]| |
[“z${word:0:1}”=“zG”]| |[“z${word:0:1}”=“zT”];然后
如果[“z${word:1:1}”!=“zo”];然后#去掉讨厌的“Total”
[$needhdr-eq 0]&&printf“%s”$word#打印头
obsfarray+=(“$obsfreq”)#填充Obs频率阵列
fi
fi
currentseq=“$inputseq”#保持当前seq以测试新值
一次完成是一项挑战。这个脚本实现了这一点。输出的格式设置为与提供的示例匹配,但可以轻松调整。下面的内联注释解释了脚本操作
#!/bin/bash
[ -f "$1" ] || {
printf "\n Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
exit 1
}
## this script requires the header row to be equal for each sequence
key="${2:-s21}" # key to identify sequence ( 3 chars ) default "s21"
currentseq="" # variable to hold sequence
declare -i needhdr=0 # flag to control print header
declare -i seqcnt=0 # sequence count
declare -a obsfarray # array to hold Obs Frequency
## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do
## capture inputseq from obscnt
if [ "z${obscnt:0:3}" = "z${key}" ]; then
# if sequence count > 0 headers is already printed and ready to print data
if [ $seqcnt -gt 0 ]; then
needhdr=1 # set need header to false
printf "\n%s" "$inputseq" # print newline followed by input sequence
for i in ${obsfarray[@]}; do # print the Obs Frequency values
printf " $i"
done
unset obsfarray # unset the array for next sequence
fi
inputseq="${obscnt}" # set the inputseq valued from obscnt
((seqcnt++)) # increment the seqcnt
fi
## print header, capture obsfreq values
# test that first char is A C G T
if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] ||
[ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
if [ "z${word:1:1}" != "zo" ]; then # get rid of pesky 'Total'
[ $needhdr -eq 0 ] && printf " %s" "$word" # print header
obsfarray+=( "$obsfreq" ) # fill Obs Frequency array
fi
fi
currentseq="$inputseq" # keep current seq to test for new value
done <"$1"
# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
printf " $i"
done
unset obsfarray
exit 0
#/bin/bash
[-f“$1”]|{
printf“\n错误:输入不足,找不到文件“%s”。\n\n”“${0/*\/}”
出口1
}
##此脚本要求每个序列的标题行相等
key=“${2:-s21}”#标识序列(3个字符)的键默认为“s21”
currentseq=”“#变量保持序列
declare-i needhdr=0#控制打印头的标志
declare-i seqcnt=0#序列计数
声明-用于保存Obs频率的obsfarray数组
##对数据文件进行单次传递
读时-r单词obscnt obsrefq expfreq oefreq | | |[-n“$word”];做
##从obscnt捕获inputseq
如果[“z${obscnt:0:3}”=“z${key}”];然后
#如果序列计数>0,则已打印标题并准备打印数据
如果[$seqcnt-gt 0];然后
needhdr=1#将需要标头设置为false
printf“\n%s”“$inputseq”#打印换行符,后跟输入序列
对于${obsfarray[@]}中的i;是否打印Obs频率值
printf“$i”
完成
取消设置obsfarray#为下一个序列取消设置数组
fi
inputseq=“${obscnt}”#设置obscnt中的inputseq值
((seqcnt++)#增加seqcnt
fi
##打印标题,捕获obsfreq值
#测试第一个字符是否为CGT
如果[“z${word:0:1}”=“zA”]| |[“z${word:0:1}”=“zC”]| |
[“z${word:0:1}”=“zG”]| |[“z${word:0:1}”=“zT”];然后
如果[“z${word:1:1}”!=“zo”];然后#去掉讨厌的“Total”
[$needhdr-eq 0]&&printf“%s”$word#打印头
obsfarray+=(“$obsfreq”)#填充Obs频率阵列
fi
fi
currentseq=“$input
/^# +s21_contig[0-9]+/ {
if (source) print_results()
source = $2
}
/^[ACGT]+ / {
if (!($1 in key))
{
key[$1] = 1
seq[++nkeys] = $1
}
obs[$1] = $3
}
END { print_results() }
function print_results( i)
{
if (printed_header == 0)
{
pad = " "
for (i = 1; i <= nkeys; i++)
{
printf "%s%s", pad, seq[i]
pad = " "
}
printf "\n"
printed_header++
}
printf "%s ", source
for (i = 1; i <= nkeys; i++)
printf " %-9s", obs[seq[i]]
printf "\n"
delete obs
}
awk -f transpose.awk data
AAAA AAAC AAAG AAAT AACA AACC AACG AACT AAGA AAGC AAGG AAGT AATA AATC AATG AATT ACAA ACAC ACAG ACAT ACCA ACCC ACCG ACCT ACGA ACGC ACGG ACGT ACTA ACTC ACTG ACTT AGAA AGAC AGAG AGAT AGCA AGCC AGCG AGCT AGGA AGGC AGGG AGGT AGTA AGTC AGTG AGTT ATAA ATAC ATAG ATAT ATCA ATCC ATCG ATCT ATGA ATGC ATGG ATGT ATTA ATTC ATTG ATTT CAAA CAAC CAAG CAAT CACA CACC CACG CACT CAGA CAGC CAGG CAGT CATA CATC CATG CATT CCAA CCAC CCAG CCAT CCCA CCCC CCCG CCCT CCGA CCGC CCGG CCGT CCTA CCTC CCTG CCTT CGAA CGAC CGAG CGAT CGCA CGCC CGCG CGCT CGGA CGGC CGGG CGGT CGTA CGTC CGTG CGTT CTAA CTAC CTAG CTAT CTCA CTCC CTCG CTCT CTGA CTGC CTGG CTGT CTTA CTTC CTTG CTTT GAAA GAAC GAAG GAAT GACA GACC GACG GACT GAGA GAGC GAGG GAGT GATA GATC GATG GATT GCAA GCAC GCAG GCAT GCCA GCCC GCCG GCCT GCGA GCGC GCGG GCGT GCTA GCTC GCTG GCTT GGAA GGAC GGAG GGAT GGCA GGCC GGCG GGCT GGGA GGGC GGGG GGGT GGTA GGTC GGTG GGTT GTAA GTAC GTAG GTAT GTCA GTCC GTCG GTCT GTGA GTGC GTGG GTGT GTTA GTTC GTTG GTTT TAAA TAAC TAAG TAAT TACA TACC TACG TACT TAGA TAGC TAGG TAGT TATA TATC TATG TATT TCAA TCAC TCAG TCAT TCCA TCCC TCCG TCCT TCGA TCGC TCGG TCGT TCTA TCTC TCTG TCTT TGAA TGAC TGAG TGAT TGCA TGCC TGCG TGCT TGGA TGGC TGGG TGGT TGTA TGTC TGTG TGTT TTAA TTAC TTAG TTAT TTCA TTCC TTCG TTCT TTGA TTGC TTGG TTGT TTTA TTTC TTTG TTTT
s21_contig00001 0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 0.0022844 0.0064893 0.0022035 0.0044879 0.0068532 0.0039623 0.0034165 0.0026079 0.0045688 0.0057817 0.0039623 0.0042656 0.0018396 0.0020822 0.0033761 0.0016173 0.0014555 0.0043869 0.0010512 0.0065095 0.0040634 0.0050944 0.0040836 0.0007076 0.0015162 0.0017588 0.0034165 0.0031941 0.0018396 0.0020216 0.0016981 0.0046497 0.0037399 0.0070756 0.0044071 0.0029111 0.0029920 0.0022035 0.0010512 0.0012938 0.0017790 0.0021227 0.0022035 0.0027494 0.0020216 0.0012938 0.0031132 0.0048922 0.0034771 0.0087131 0.0016981 0.0062871 0.0046497 0.0043060 0.0033761 0.0022237 0.0033558 0.0043666 0.0069745 0.0079246 0.0041645 0.0054987 0.0043666 0.0016375 0.0026483 0.0028100 0.0021227 0.0011523 0.0061254 0.0013545 0.0017588 0.0025674 0.0065904 0.0036793 0.0057817 0.0043464 0.0017588 0.0020216 0.0043060 0.0021429 0.0027291 0.0042858 0.0022035 0.0055796 0.0077225 0.0059435 0.0050944 0.0007278 0.0030930 0.0013545 0.0039623 0.0066308 0.0064489 0.0048720 0.0087131 0.0049933 0.0094004 0.0072373 0.0070756 0.0057211 0.0099462 0.0042858 0.0043869 0.0027494 0.0077023 0.0028100 0.0064893 0.0008895 0.0008491 0.0002426 0.0012938 0.0026483 0.0032345 0.0048720 0.0020216 0.0028909 0.0033963 0.0020216 0.0020822 0.0012332 0.0058222 0.0054987 0.0061659 0.0080661 0.0042656 0.0058222 0.0033558 0.0038006 0.0026685 0.0077023 0.0017790 0.0023653 0.0058020 0.0030930 0.0015162 0.0027696 0.0048518 0.0065904 0.0045688 0.0069543 0.0030526 0.0033963 0.0046497 0.0052561 0.0037602 0.0099462 0.0029920 0.0074192 0.0095015 0.0077225 0.0040634 0.0010917 0.0058020 0.0061254 0.0068532 0.0059637 0.0027898 0.0032345 0.0034771 0.0050540 0.0037602 0.0094004 0.0037399 0.0034165 0.0037602 0.0027291 0.0014555 0.0009097 0.0026685 0.0026483 0.0022844 0.0018801 0.0017386 0.0008491 0.0020216 0.0048720 0.0027898 0.0064489 0.0018396 0.0025674 0.0030526 0.0017588 0.0018396 0.0010512 0.0042656 0.0041645 0.0060850 0.0032345 0.0010512 0.0012332 0.0022237 0.0015364 0.0009097 0.0027494 0.0012938 0.0007480 0.0010917 0.0007278 0.0007076 0.0012130 0.0027696 0.0025674 0.0026079 0.0063882 0.0025674 0.0028909 0.0062871 0.0034165 0.0034165 0.0057211 0.0029111 0.0071564 0.0074192 0.0055796 0.0065095 0.0007480 0.0023653 0.0011523 0.0044879 0.0057211 0.0048720 0.0026483 0.0048922 0.0033558 0.0050540 0.0049933 0.0046497 0.0034165 0.0052561 0.0021429 0.0016173 0.0015364 0.0038006 0.0016375 0.0045890 0.0022237 0.0018801 0.0008895 0.0027494 0.0057211 0.0059637 0.0066308 0.0031941 0.0063882 0.0069543 0.0043464 0.0042656 0.0032345 0.0080661 0.0079246 0.0115837
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 0.0029920 0.0057776 0.0028200 0.0050210 0.0064654 0.0048834 0.0029920 0.0051585 0.0052617 0.0055024 0.0048146 0.0062934 0.0024761 0.0031639 0.0041956 0.0024417 0.0015820 0.0041956 0.0014444 0.0047459 0.0030607 0.0035078 0.0028200 0.0013756 0.0015820 0.0022010 0.0029920 0.0048146 0.0019259 0.0020978 0.0026481 0.0049866 0.0035422 0.0058463 0.0029576 0.0040581 0.0031295 0.0028888 0.0014444 0.0016163 0.0015820 0.0021322 0.0028200 0.0041268 0.0029576 0.0026137 0.0058463 0.0048490 0.0040237 0.0070156 0.0026481 0.0067749 0.0041956 0.0050554 0.0041956 0.0029232 0.0052617 0.0047459 0.0078066 0.0080473 0.0046771 0.0053305 0.0047459 0.0027856 0.0030263 0.0024761 0.0021322 0.0017883 0.0052273 0.0018915 0.0022010 0.0037141 0.0066717 0.0043332 0.0055024 0.0049522 0.0024417 0.0021666 0.0050554 0.0026481 0.0032327 0.0027856 0.0028888 0.0037829 0.0057432 0.0037829 0.0035078 0.0016851 0.0030951 0.0018915 0.0048834 0.0055712 0.0034734 0.0033015 0.0070156 0.0032327 0.0062934 0.0041268 0.0058463 0.0039893 0.0058807 0.0027856 0.0041956 0.0020978 0.0037829 0.0024761 0.0057776 0.0016163 0.0015820 0.0006878 0.0026137 0.0024073 0.0037485 0.0033015 0.0020978 0.0024417 0.0033359 0.0021666 0.0031639 0.0023729 0.0058120 0.0053305 0.0058463 0.0084944 0.0043332 0.0058120 0.0052617 0.0037829 0.0020634 0.0037829 0.0015820 0.0031983 0.0036798 0.0030951 0.0015820 0.0027512 0.0038517 0.0066717 0.0052617 0.0059151 0.0025105 0.0033359 0.0041956 0.0050210 0.0027856 0.0058807 0.0031295 0.0051929 0.0055024 0.0057432 0.0030607 0.0019602 0.0036798 0.0052273 0.0064654 0.0064654 0.0022698 0.0037485 0.0040237 0.0045739 0.0024073 0.0062934 0.0035422 0.0039549 0.0027856 0.0032327 0.0015820 0.0015820 0.0020634 0.0030263 0.0029920 0.0024073 0.0017883 0.0015820 0.0029576 0.0035422 0.0022698 0.0034734 0.0019259 0.0029920 0.0025105 0.0024417 0.0024761 0.0017539 0.0043332 0.0046771 0.0069468 0.0040581 0.0017539 0.0023729 0.0029232 0.0034390 0.0015820 0.0020978 0.0016163 0.0014788 0.0019602 0.0016851 0.0013756 0.0039205 0.0027512 0.0037141 0.0051585 0.0056400 0.0029920 0.0024417 0.0067749 0.0045051 0.0039549 0.0039893 0.0040581 0.0056400 0.0051929 0.0037829 0.0047459 0.0014788 0.0031983 0.0017883 0.0050210 0.0070500 0.0035422 0.0024073 0.0048490 0.0031639 0.0045739 0.0032327 0.0049866 0.0045051 0.0050210 0.0026481 0.0024417 0.0034390 0.0037829 0.0027856 0.0061215 0.0029576 0.0024073 0.0016163 0.0041268 0.0070500 0.0064654 0.0055712 0.0048146 0.0056400 0.0059151 0.0049522 0.0062934 0.0040581 0.0084944 0.0080473 0.0125181
#!/bin/bash
[ -f "$1" ] || {
printf "\n Error: insufficient input, file '%s' not found.\n\n" "${0//*\//}"
exit 1
}
## this script requires the header row to be equal for each sequence
key="${2:-s21}" # key to identify sequence ( 3 chars ) default "s21"
currentseq="" # variable to hold sequence
declare -i needhdr=0 # flag to control print header
declare -i seqcnt=0 # sequence count
declare -a obsfarray # array to hold Obs Frequency
## make single pass through data file
while read -r word obscnt obsfreq expfreq oefreq || [ -n "$word" ]; do
## capture inputseq from obscnt
if [ "z${obscnt:0:3}" = "z${key}" ]; then
# if sequence count > 0 headers is already printed and ready to print data
if [ $seqcnt -gt 0 ]; then
needhdr=1 # set need header to false
printf "\n%s" "$inputseq" # print newline followed by input sequence
for i in ${obsfarray[@]}; do # print the Obs Frequency values
printf " $i"
done
unset obsfarray # unset the array for next sequence
fi
inputseq="${obscnt}" # set the inputseq valued from obscnt
((seqcnt++)) # increment the seqcnt
fi
## print header, capture obsfreq values
# test that first char is A C G T
if [ "z${word:0:1}" = "zA" ] || [ "z${word:0:1}" = "zC" ] ||
[ "z${word:0:1}" = "zG" ] || [ "z${word:0:1}" = "zT" ]; then
if [ "z${word:1:1}" != "zo" ]; then # get rid of pesky 'Total'
[ $needhdr -eq 0 ] && printf " %s" "$word" # print header
obsfarray+=( "$obsfreq" ) # fill Obs Frequency array
fi
fi
currentseq="$inputseq" # keep current seq to test for new value
done <"$1"
# print final sequence and Obs Frequency array
printf "\n%s" "$inputseq"
for i in ${obsfarray[@]}; do
printf " $i"
done
unset obsfarray
exit 0
$ ./dna.sh dat/dna.dat
AAAA AAAC AAAG AAAT AACA <snip>
s21_contig00001 0.0115837 0.0060850 0.0061659 0.0069745 0.0045890 <snip>
s21_contig00002 0.0125181 0.0069468 0.0058463 0.0078066 0.0061215 <snip>