
Open Reading Frames

Either strand of a DNA double helix can serve as the coding strand for RNA transcription. Hence, a given DNA string implies six total reading frames, or ways in which the same region of DNA can be translated into amino acids: three reading frames result from reading the string itself, whereas three more result from reading its reverse complement.

An open reading frame (ORF) is one which starts from the start codon and ends by stop codon, without any other stop codons in between. Thus, a candidate protein string is derived by translating an open reading frame into amino acids until a stop codon is reached.

Given: A DNA string s of length at most 1 kbp in FASTA format.
Sample input:


Return: Every distinct candidate protein string that can be translated from ORFs of s. Strings can be returned in any order.
Sample output:




1. 输入核酸序列,并同时获得其反向互补序列。
有关反向互补序列问题可以参考这一篇文章:Rosalind Java| Complementing a Strand of DNA。
2. 遍历序列,检索起始密码子和终止密码子。获取其间序列作为备选的ORF。
3. 翻译备选ORF为蛋白质
有关翻译蛋白的问题可以参考这一篇文章:Rosalind Java| Translating RNA into Protein。




//反向互补序列public static String CompleDNA(String s) {//新建StringBuilder类数据以存储替换后的碱基StringBuilder arr = new StringBuilder();for (int i = 0; i < s.length(); i++) {switch (s.charAt(i)) {//括号中的输出类型为char类型,匹配后面的字符时需要将字符加单引号可匹配。case 'A':case 'a':arr.append('T');break;case 'C':case 'c':arr.append('G');break;case 'G':case 'g':arr.append('C');break;case 'T':case 't':arr.append('A');break;default:System.out.println("第" + (i + 1) + "位不是ACTG");break;}}String str = arr.toString();return str;}public static String reverse(String s) {String ss = "";for (int i = s.length() - 1; i >= 0; i--) {ss += s.charAt(i);}return ss;}



    //翻译RNA to Propublic static String RNA2protein(String line) {StringBuilder arr = new StringBuilder();//2.遍历核酸序列并且每三个字符串为一组提取核苷酸for (int i = 0; i < line.length(); i += 3) {String pro = line.substring(i, i + 3);//两个参数,第一个是待提取文本的起始提取位置,第二个是终止提取位置,区间内左闭右开。switch (pro) {case "AUA":case "AUC":case "AUU":arr.append("I");break;case "AUG":arr.append("M");break;case "ACA":case "ACC":case "ACG":case "ACU":arr.append("T");break;case "AAC":case "AAU":arr.append("N");break;case "AAA":case "AAG":arr.append("K");break;case "AGC":case "AGU":arr.append("S");break;case "AGA":case "AGG":arr.append("R");break;case "CUA":case "CUC":case "CUG":case "CUU":arr.append("L");break;case "CCA":case "CCC":case "CCG":case "CCU":arr.append("P");break;case "CAC":case "CAU":arr.append("H");break;case "CAA":case "CAG":arr.append("Q");break;case "CGA":case "CGC":case "CGG":case "CGU":arr.append("R");break;case "GUA":case "GUC":case "GUG":case "GUU":arr.append("V");break;case "GCA":case "GCC":case "GCG":case "GCU":arr.append("A");break;case "GAC":case "GAU":arr.append("D");break;case "GAA":case "GAG":arr.append("E");break;case "GGA":case "GGC":case "GGG":case "GGU":arr.append("G");break;case "UCA":case "UCC":case "UCG":case "UCU":arr.append("S");break;case "UUC":case "UUU":arr.append("F");break;case "UUA":case "UUG":arr.append("L");break;case "UAC":case "UAU":arr.append("Y");break;case "UGC":case "UGU":arr.append("C");break;case "UAA":case "UAG":case "UGA":break;case "UGG":arr.append("W");break;default:break;}}String str = arr.toString();return str;}







import java.util.Scanner;
import java.util.Set;public class Open_Reading_Frames {public static void main(String[] args) {//1.输入数据Scanner sc = new Scanner(;System.out.println("请输入核酸序列:");String line = sc.nextLine();String revline = CompleDNA(reverse(line));Set<String> subPro = new java.util.HashSet<String>();//2.正链检测ATG,将子序列保存到数组中for (int i = 0; i < line.length() - 3; i += 1) {//遍历时设置line.length() - 3以防止引索越界。String forwindex = line.substring(i, i + 3);if (forwindex.equals("ATG")) {String Substring = line.substring(i);for (int k = 0; k < Substring.length() - 3; k += 3) {//步长设置与检索ATG有差异,此处以3密码子为步长。String revindex = Substring.substring(k, k + 3);if (revindex.equals("TGA") || revindex.equals("TAA") || revindex.equals("TAG")) {String transDNA = line.substring(i, i + k + 3);
//                        System.out.println(transDNA);//3.子序列分别转录,翻译String RNA = transDNA.replace('T', 'U');subPro.add(RNA2protein(RNA));break;}}}}//3.反义链检测ORFfor (int i = 0; i < revline.length() - 3; i += 1) {String forwindex = revline.substring(i, i + 3);if (forwindex.equals("ATG")) {String Substring = revline.substring(i);for (int k = 0; k < Substring.length() - 3; k += 3) {//步长设置与检索ATG有差异,此处以3密码子为步长。String revindex = Substring.substring(k, k + 3);if (revindex.equals("TGA") || revindex.equals("TAA") || revindex.equals("TAG")) {String transDNA = revline.substring(i, i + k + 3);
//                        System.out.println(transDNA);//3.子序列分别转录,翻译String RNA = transDNA.replace('T', 'U');subPro.add(RNA2protein(RNA));break;}}}}for (String s : subPro) {System.out.println(s);}}//翻译RNA to Propublic static String RNA2protein(String line) {StringBuilder arr = new StringBuilder();//2.遍历核酸序列并且每三个字符串为一组提取核苷酸for (int i = 0; i < line.length(); i += 3) {String pro = line.substring(i, i + 3);//两个参数,第一个是待提取文本的起始提取位置,第二个是终止提取位置,区间内左闭右开。switch (pro) {case "AUA":case "AUC":case "AUU":arr.append("I");break;case "AUG":arr.append("M");break;case "ACA":case "ACC":case "ACG":case "ACU":arr.append("T");break;case "AAC":case "AAU":arr.append("N");break;case "AAA":case "AAG":arr.append("K");break;case "AGC":case "AGU":arr.append("S");break;case "AGA":case "AGG":arr.append("R");break;case "CUA":case "CUC":case "CUG":case "CUU":arr.append("L");break;case "CCA":case "CCC":case "CCG":case "CCU":arr.append("P");break;case "CAC":case "CAU":arr.append("H");break;case "CAA":case "CAG":arr.append("Q");break;case "CGA":case "CGC":case "CGG":case "CGU":arr.append("R");break;case "GUA":case "GUC":case "GUG":case "GUU":arr.append("V");break;case "GCA":case "GCC":case "GCG":case "GCU":arr.append("A");break;case "GAC":case "GAU":arr.append("D");break;case "GAA":case "GAG":arr.append("E");break;case "GGA":case "GGC":case "GGG":case "GGU":arr.append("G");break;case "UCA":case "UCC":case "UCG":case "UCU":arr.append("S");break;case "UUC":case "UUU":arr.append("F");break;case "UUA":case "UUG":arr.append("L");break;case "UAC":case "UAU":arr.append("Y");break;case "UGC":case "UGU":arr.append("C");break;case "UAA":case "UAG":case "UGA":break;case "UGG":arr.append("W");break;default:break;}}String str = arr.toString();return str;}//反向互补序列public static String CompleDNA(String s) {//新建StringBuilder类数据以存储替换后的碱基StringBuilder arr = new StringBuilder();for (int i = 0; i < s.length(); i++) {switch (s.charAt(i)) {//括号中的输出类型为char类型,匹配后面的字符时需要将字符加单引号可匹配。case 'A':case 'a':arr.append('T');break;case 'C':case 'c':arr.append('G');break;case 'G':case 'g':arr.append('C');break;case 'T':case 't':arr.append('A');break;default:System.out.println("第" + (i + 1) + "位不是ACTG");break;}}String str = arr.toString();return str;}public static String reverse(String s) {String ss = "";for (int i = s.length() - 1; i >= 0; i--) {ss += s.charAt(i);}return ss;}




  Rosalind Java|Locating Restriction Sites

    Rosalind编程问题之检索限制性位点. Locating Restriction Sites Problem: A DNA string is a reverse palindrome if it ...

  Rosalind Java|Matching Random Motifs

    Rosalind编程问题之计算随机序列出现并匹配待比对序列的概率. 跟Rosalind Java|Introduction to Random Strings有异曲同工之妙. Matching Ran ...

  Rosalind Java| Computing GC Content

    Rosalind编程问题之计算GC含量. Computing GC Content Problem The GC-content of a DNA string is given by the per ...

  Rosalind Java| Counting Point Mutations

    Rosalind编程问题之计数核酸序列突变数. Counting Point Mutations Problem Given two strings s and t of equal length, ...

  ftdi android,FTDI D2xx android java not reading

    问题 I am currently porting some code I have form C# to Java to run in on an Android system. In my cod ...

  Rosalind Java|Longest Increasing Subsequence动态规划算法

    Rosalind编程问题之计算集合中最长的递增元素子集. Longest Increasing Subsequence Problem: A subsequence of a permutation ...

  Rosalind Java| Finding a Shared Motif

    Rosalind编程问题之寻找共有的motif. Finding a Shared Motif Problem A common substring of a collection of string ...

  Rosalind Java|Overlap Graphs

    Rosalind编程问题之查找重叠区段. Overlap Graphs Problem: A graph whose nodes have all been labeled can be repres ...

  Rosalind Java|Consensus and Profile

    Rosalind编程问题之统计多个序列中profile矩阵和consensus. Consensus and Profile Problem: A matrix is a rectangular ta ...


