如何完成编码给定氨基酸序列的基因组的子串

肽编码问题包括寻找编码给定氨基酸序列的基因组的子串。

遗传密码描述了RNA 3聚体(密码子)翻译成20种不同氨基酸之一。 前三个圆圈从内向外移动,代表给定密码子的第一,第二和第三核苷酸。 第4,第5和第6个圆圈以三种方式定义翻译的氨基酸:氨基酸的全名,3个字母的缩写,以及单个字母的缩写。 64个总RNA密码子中的三个是终止密码子,其停止翻译并隐含地在氨基酸字母表中添加第21个终止符号。

在此处输入图像描述

问题在于拥有

输入:DNA字符串Text和氨基酸字符串Peptide。

输出:文本编码肽的所有子串(如果存在任何此类子串)。

Sample Input: ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA MA Sample Output: ATGGCC GGCCAT ATGGCC 

通过读取三元组获得结果, ATGGCC编码MAGGCCAT反向将CCGGTA反向写入,所以它实际上是ATGGCC (CCGGTA.reverse)

遗传密码的补充是CG,GC,TA和AT

所以为了解决这个问题,我首先使用已定义的数组

 using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Linq; using System.IO; using System.Collections; class Program { private static string[] CODONS = { "TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TGT", "TGC", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG", }; private static string[] AMINOS_PER_CODON = { "F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "C", "C", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G", }; private static string[] AMINO_ABBREVIATIONS = { "F", "L", "I", "M", "V", "S", "P", "T", "A", "Y", "H", "Q", "N", "K", "D", "E", "C", "W", "R", "G" }; private static string[] FULL_NAMES = { "phenylalanine", "leucine", "isoleucine", "methionine", "valine", "serine", "proline", "threonine", "alanine", "tyrosine", "histidine", "glutamine", "asparagine", "lysine", "aspartic acid", "glutamic acid", "cysteine", "tryptophan", "arginine", "glycine" }; static void Main() { //then input is DNA string dna = ""; //The given peptide string givenPeptide = "KEVFEPHYY"; char[] givenPeptideArray = givenPeptide.ToCharArray() ; string codon=""; string auxCodon=""; string convertedCodon = ""; string clean = ""; int pivot = 0; int foundFlag = 0; string cleanReverse = ""; int foundFlagReverse = 0; string convertedCodonReverse = ""; //so the idea I was working was to loop dna string until getting to 3*givenPeptide.Length //because dna is red 3 by 3... //then get givenPeptide.Length and compare each char of given peptide with encoded //letter of scanned triplet //so for example first givenpeptide is 'M' so I would search for 'ATG' //then, if found increment a counter and then search for next 'GCC' etc... //however When testing with longer string I do not get complete result //I have for (int pos = 0; pos < dna.Length - (3*givenPeptide.Length) ; pos++) { pivot = pos; for (int codonPos = 0; codonPos < givenPeptide.Length; codonPos++) { codon = dna.Substring(pivot, 3); auxCodon = givenPeptideArray[codonPos].ToString(); convertedCodon = codonToAminoAcid(codon); convertedCodonReverse = codonToAminoAcid(DNAComplement(codon)); if (auxCodon.Equals(convertedCodon) ) { foundFlag++; clean += codon; } else { foundFlag = 0; clean = ""; } if (foundFlag == givenPeptide.Length) { Console.WriteLine(clean); foundFlag = 0; clean = ""; } //reverse if (auxCodon.Equals(convertedCodonReverse)) { foundFlagReverse++; cleanReverse += codon; } else { foundFlagReverse = 0; cleanReverse = ""; } if (foundFlagReverse == givenPeptide.Length) { Console.WriteLine(cleanReverse); foundFlagReverse = 0; cleanReverse = ""; } pivot+=3; } } }//end main public static string DNAComplement(string dna) { char[] array = dna.ToCharArray(); for (int i = 0; i < array.Length; i++) { char let = array[i]; if (let == 'A') array[i] = 'T'; else if (let == 'T') array[i] = 'A'; else if (let == 'C') array[i] = 'G'; else if (let == 'G') array[i] = 'C'; } Array.Reverse(array); return new string(array); } public static string codonToAminoAcid(String codon) { for (int k = 0; k < CODONS.Length ; k++) { if (CODONS[k].Equals (codon)) { return AMINOS_PER_CODON[k]; } } // never reach here with valid codon return "X"; } }//end class 

我应该得到

 AAGGAAGTATTTGAGCCTCATTATTAC AAAGAGGTGTTTGAACCTCATTACTAT AAGGAGGTATTTGAACCCCACTATTAC AAAGAAGTTTTCGAACCACATTATTAC AAGGAAGTGTTTGAACCTCACTATTAT AAAGAAGTTTTCGAGCCGCACTACTAC AAGGAAGTATTCGAACCACATTACTAT ATAATAATGCGGCTCGAATACTTCCTT GTAGTAATGGGGCTCGAAAACCTCCTT GTAGTAATGAGGTTCAAAAACCTCCTT GTAGTAATGGGGTTCGAAGACTTCCTT ATAATAGTGAGGCTCAAAAACTTCCTT ATAGTAATGGGGTTCGAAGACTTCCTT GTAGTAGTGCGGCTCAAAAACTTCCTT ATAGTAATGAGGTTCGAAAACCTCTTT ATAATAATGTGGCTCGAACACTTCTTT GTAGTAATGGGGCTCAAACACCTCTTT ATAGTAGTGAGGTTCGAAGACTTCCTT GTAATAGTGCGGTTCAAAAACTTCCTT ATAGTAGTGTGGTTCAAATACCTCCTT 

但是我只得到:

 AAGGAAGTATTTGAGCCTCATTATTAC AAAGAGGTGTTTGAACCTCATTACTAT AAGGAAGTGTTTGAACCTCACTATTAT AAAGAAGTTTTCGAGCCGCACTACTAC AAGGAGGTATTTGAACCCCACTATTAC AAAGAAGTTTTCGAACCACATTATTAC 

我正在考虑问题,似乎问题是我每次都没有循环所有字符串…

看起来问题是双重的。 你的外部for循环不够远,无法捕获dna字符串的末尾。 其次,似乎逆转单个密码子不起作用,并且在获得补码之前必须取整个(givenPeptideLength * 3)字符串。 以下更改会输出您在示例中输入的字符串,因此我希望它能够满足您的需求。 它的优点还在于更加简洁。

 using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Linq; using System.IO; using System.Collections; namespace DNA { class Program { private static string[] CODONS = { "TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TGT", "TGC", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG", }; private static string[] AMINOS_PER_CODON = { "F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "C", "C", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G", }; private static string[] AMINO_ABBREVIATIONS = { "F", "L", "I", "M", "V", "S", "P", "T", "A", "Y", "H", "Q", "N", "K", "D", "E", "C", "W", "R", "G" }; private static string[] FULL_NAMES = { "phenylalanine", "leucine", "isoleucine", "methionine", "valine", "serine", "proline", "threonine", "alanine", "tyrosine", "histidine", "glutamine", "asparagine", "lysine", "aspartic acid", "glutamic acid", "cysteine", "tryptophan", "arginine", "glycine" }; public static string DNAComplement(string dna) { char[] array = dna.ToCharArray(); for (int i = 0; i < array.Length; i++) { char let = array[i]; if (let == 'A') array[i] = 'T'; else if (let == 'T') array[i] = 'A'; else if (let == 'C') array[i] = 'G'; else if (let == 'G') array[i] = 'C'; } Array.Reverse(array); return new string(array); } public static string StringToAminoAcid(String input) { string result = ""; for (int i = 0; i < input.Length; i += 3) { result += codonToAminoAcid(input.Substring(i, 3)); } return result; } public static string codonToAminoAcid(String codon) { for (int k = 0; k < CODONS.Length; k++) { if (CODONS[k].Equals(codon)) { return AMINOS_PER_CODON[k]; } } // never reach here with valid codon return "X"; } static void Main() { //then input is DNA string dna = ""; //The given peptide string givenPeptide = "KEVFEPHYY"; int resultCount = 0; int candidateLength = givenPeptide.Length * 3; string forward = ""; string backward = ""; for (int pos = 0; pos < dna.Length - candidateLength + 1; pos++) // Added the "+ 1" { forward = dna.Substring(pos, candidateLength); backward = DNAComplement(forward); // Unremark to get a glimpse into what is happening... //Console.WriteLine("pos: {0}, forward: {1}, backward: {2}, forwardTranslation: {3}, backwardTranslation: {4}", // pos, forward, backward, StringToAminoAcid(forward), StringToAminoAcid(backward)); if ( (StringToAminoAcid(forward) == givenPeptide) || (StringToAminoAcid(backward) == givenPeptide) ) { resultCount++; Console.WriteLine(String.Format("Result {0,3} at position {1,6}... {2}", resultCount, pos, forward)); } } Console.WriteLine("Done"); Console.ReadLine(); }//end main }//end class }