This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository giira.

commit 54ea89068c995776063bbfac8dd31e9fe157d1ea
Author: Andreas Tille <[email protected]>
Date:   Mon Jan 9 11:55:59 2017 +0100

    New upstream version 0.0.20140625
---
 src/geneFinder/ExtractGeneCandidates.java       |   3 +
 src/geneFinder/FrameSearch.java                 | 236 ++++++++++++++++++++++++
 src/geneFinder/GeneFinder.java                  |  10 +-
 src/geneFinder/Giira.java                       |   5 +-
 src/geneFinder/ProkaryoteExtraction.java        |   3 +
 src/geneFinder/Prokaryote_Specials.java         | 211 +++++++++++++++++++++
 src/geneFinder/ReadInParameters_GeneFinder.java |  88 ++++++++-
 src/geneFinder/SamParser.java                   |  76 ++++++--
 src/types/Rna.java                              |   2 +-
 9 files changed, 612 insertions(+), 22 deletions(-)

diff --git a/src/geneFinder/ExtractGeneCandidates.java 
b/src/geneFinder/ExtractGeneCandidates.java
index a663c7c..3bc9bb2 100755
--- a/src/geneFinder/ExtractGeneCandidates.java
+++ b/src/geneFinder/ExtractGeneCandidates.java
@@ -57,6 +57,9 @@ public class ExtractGeneCandidates {
                                        if(line.startsWith(">")){
                                                // test if correct contig
                                                
if(line.substring(1).startsWith(contigName)){
+                                                       
if(!((line.substring(1).startsWith(contigName+" ")) || 
(line.substring(1).length() == contigName.length()))){
+                                                               continue;       
                // as an additional check to avoid picking the wrong contig 
because of name sub-similarities                                                
                                    
+                                                       }
                                                        // found right one, now 
extract sequence
                                                        while(((line = 
br.readLine()) != null) && (line.length() != 0) &&  (!(line.startsWith(">")))){
                                                                String line2 = 
"";
diff --git a/src/geneFinder/FrameSearch.java b/src/geneFinder/FrameSearch.java
index d469eaa..76fdd1f 100755
--- a/src/geneFinder/FrameSearch.java
+++ b/src/geneFinder/FrameSearch.java
@@ -26,6 +26,12 @@ public class FrameSearch {
        
        public static int findPossibleStarts_Forward(Gene cluster, StringBuffer 
contigSeq, int posAr, int tempStop){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("START 
FO")){
+                               return 
FrameSearch.findPossibleStarts_Forward_AlternativeStarts(cluster, contigSeq, 
posAr, tempStop, GeneFinder.alternativeCodons.get("START FO"));
+                       }
+               }
+               
                if(tempStop < 
(int)Math.max(0,(cluster.startPos-GeneFinder.readLength))){
                        return -1;
                }
@@ -69,6 +75,12 @@ public class FrameSearch {
        
        public static int findPossibleStarts_Reverse(Gene cluster, StringBuffer 
contigSeq, int posAr, int tempStop){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("START 
RE")){
+                               return 
FrameSearch.findPossibleStarts_Reverse_AlternativeStarts(cluster, contigSeq, 
posAr, tempStop, GeneFinder.alternativeCodons.get("START RE"));
+                       }
+               }
+               
                int start_RE = -1;
                
                if(tempStop < 
(int)Math.max(0,(cluster.startPos-GeneFinder.readLength))){
@@ -113,6 +125,112 @@ public class FrameSearch {
        }
        
        /*
+        * if alternative start and stop codons are given, perform a more 
general search also respecting those codons
+        * 
+        */
+       
+       public static int findPossibleStarts_Forward_AlternativeStarts(Gene 
cluster, StringBuffer contigSeq, int posAr, int tempStop,String[] 
alternativeStarts){
+               
+               int start1 = -1;
+               
+               if(tempStop < 
(int)Math.max(0,(cluster.startPos-GeneFinder.readLength))){
+                       return -1;
+               }
+               
+               String startPart = 
contigSeq.substring((int)Math.max(0,(cluster.startPos-GeneFinder.readLength)), 
tempStop);
+               
+               int startSub_alt[] = new int[alternativeStarts.length];
+               
+               for(int i = 0; i<alternativeStarts.length;++i){
+                       startSub_alt[i] = 
startPart.lastIndexOf(alternativeStarts[i]);
+               }
+               
+               java.util.Arrays.sort(startSub_alt);
+               
+               for(int i = startSub_alt.length -1; i>= 0;i--){
+                       if(startSub_alt[i] > -1){ 
+                               start1 = (int) 
Math.max(0,(cluster.startPos-GeneFinder.readLength)) + startSub_alt[i]; 
+                               break;
+                       }
+               }
+               
+               if(start1 == -1){
+                       return start1;
+               }
+               
+               boolean foundSameFrame = false;
+               
+               for(int i = 0; i<posAr;++i){
+                       if((cluster.possibleStarts_Forward[i] - start1) % 3 == 
0){
+                               foundSameFrame = true;
+                               break;
+                       }
+               }
+               
+               if(!foundSameFrame){
+                       cluster.possibleStarts_Forward[posAr++] = start1;
+                       
findPossibleStarts_Forward_AlternativeStarts(cluster,contigSeq,posAr,start1,alternativeStarts);
+               }else{
+                       
findPossibleStarts_Forward_AlternativeStarts(cluster,contigSeq,posAr,start1,alternativeStarts);
+               }
+               
+               return start1;
+       }
+
+       /*
+        * if alternative start and stop codons are given, perform a more 
general search also respecting those codons
+        * 
+        */
+       
+       public static int findPossibleStarts_Reverse_AlternativeStarts(Gene 
cluster, StringBuffer contigSeq, int posAr, int tempStop,String[] 
alternativeStops){
+       
+       int start_RE = -1;
+       
+       if(tempStop < 
(int)Math.max(0,(cluster.startPos-GeneFinder.readLength))){
+               return -1;
+       }
+       
+       String startPart = 
contigSeq.substring((int)Math.max(0,(cluster.startPos-GeneFinder.readLength)), 
tempStop);
+       
+       int startSub_alt[] = new int[alternativeStops.length];
+       
+       for(int i = 0; i<alternativeStops.length;++i){
+               startSub_alt[i] = startPart.lastIndexOf(alternativeStops[i]);
+       }
+       
+       java.util.Arrays.sort(startSub_alt);
+       
+       for(int i = startSub_alt.length -1; i>= 0;i--){
+               if(startSub_alt[i] > -1){ 
+                       start_RE = (int) 
Math.max(0,(cluster.startPos-GeneFinder.readLength)) + startSub_alt[i]; 
+                       break;
+               }
+       }
+       
+       if(start_RE == -1){
+               return start_RE;
+       }
+       
+       boolean foundSameFrame = false;
+       
+       for(int i = 0; i<posAr;++i){
+               if((cluster.possibleStarts_Reverse[i] - start_RE) % 3 == 0){
+                       foundSameFrame = true;
+                       break;
+               }
+       }
+       
+       if(!foundSameFrame){
+               cluster.possibleStarts_Reverse[posAr++] = start_RE;
+               
findPossibleStarts_Reverse_AlternativeStarts(cluster,contigSeq,posAr,start_RE,alternativeStops);
+       }else{
+               
findPossibleStarts_Reverse_AlternativeStarts(cluster,contigSeq,posAr,start_RE,alternativeStops);
+       }
+       
+       return start_RE;
+}
+       
+       /*
         * new way of gene extraction by remembering all starts that are not in 
the same frame (maxNum = 3)
         * after that, starts and stops are checked if we find a combination 
that defines the frame of the cluster
         * 
@@ -122,6 +240,12 @@ public class FrameSearch {
        
        public static int findPossibleStops_Forward(Gene cluster, StringBuffer 
contigSeq, int posAr, int tempStart){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("STOP FO")){
+                               return 
FrameSearch.findPossibleStops_Forward_AlternativeStops(cluster, contigSeq, 
posAr, tempStart, GeneFinder.alternativeCodons.get("STOP FO"));
+                       }
+               }
+               
                int stop_FO = -1;
                
                if(tempStart > (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1)){
@@ -174,6 +298,12 @@ public class FrameSearch {
        
        public static int findPossibleStops_Reverse(Gene cluster, StringBuffer 
contigSeq, int posAr, int tempStart){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("STOP RE")){
+                               return 
FrameSearch.findPossibleStops_Reverse_AlternativeStop(cluster, contigSeq, 
posAr, tempStart, GeneFinder.alternativeCodons.get("STOP RE"));
+                       }
+               }
+               
                if(tempStart > (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1)){
                        return -1;
                }
@@ -206,6 +336,112 @@ public class FrameSearch {
        }
        
        /*
+        * if alternative start and stop codons are given, perform a more 
general search also respecting those codons
+        * 
+        */
+       
+       public static int findPossibleStops_Forward_AlternativeStops(Gene 
cluster, StringBuffer contigSeq, int posAr, int tempStart, String[] 
alternativeStops){
+               
+               int stop_FO = -1;
+               
+               if(tempStart > (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1)){
+                       return -1;
+               }
+               
+               String stopPart = contigSeq.substring(tempStart, (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1));
+               
+               int stopSub_alt[] = new int[alternativeStops.length];
+               
+               for(int i = 0; i<alternativeStops.length;++i){
+                       stopSub_alt[i] = stopPart.indexOf(alternativeStops[i]);
+               }
+               
+               java.util.Arrays.sort(stopSub_alt);
+               
+               for(int i = 0; i < stopSub_alt.length;++i){
+                       if(stopSub_alt[i] > -1){ 
+                               stop_FO = tempStart + stopSub_alt[i]; 
+                               break;
+                       }
+               }
+               
+               if(stop_FO == -1){
+                       return stop_FO;
+               }
+               
+               boolean foundSameFrame = false;
+               
+               for(int i = 0; i<posAr;++i){
+                       if((cluster.possibleStops_Forward[i] - stop_FO) % 3 == 
0){
+                               foundSameFrame = true;
+                               break;
+                       }
+               }
+               
+               if(!foundSameFrame){
+                       cluster.possibleStops_Forward[posAr++] = stop_FO;
+                       
findPossibleStops_Forward_AlternativeStops(cluster,contigSeq,posAr,stop_FO+3,alternativeStops);
+               }else{
+                       
findPossibleStops_Forward_AlternativeStops(cluster,contigSeq,posAr,stop_FO+3,alternativeStops);
+               }
+               
+               return stop_FO;
+       }
+
+       /*
+        * if alternative start and stop codons are given, perform a more 
general search also respecting those codons
+        * 
+        */
+       
+       public static int findPossibleStops_Reverse_AlternativeStop(Gene 
cluster, StringBuffer contigSeq, int posAr, int tempStart, String[] 
alternativeStarts){
+               
+               int start1 = -1;
+               
+               if(tempStart > (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1)){
+                       return -1;
+               }
+               
+               String stopPart = contigSeq.substring(tempStart, (int) 
Math.min(contigSeq.length(),cluster.stopPos-2 + GeneFinder.readLength + 1));
+               
+               int stopSub_alt[] = new int[alternativeStarts.length];
+               
+               for(int i = 0; i<alternativeStarts.length;++i){
+                       stopSub_alt[i] = stopPart.indexOf(alternativeStarts[i]);
+               }
+               
+               java.util.Arrays.sort(stopSub_alt);
+               
+               for(int i = 0; i < stopSub_alt.length;++i){
+                       if(stopSub_alt[i] > -1){ 
+                               start1 = tempStart + stopSub_alt[i]; 
+                               break;
+                       }
+               }
+               
+               if(start1 == -1){
+                       return start1;
+               }
+               
+               boolean foundSameFrame = false;
+               
+               for(int i = 0; i<posAr;++i){
+                       if((cluster.possibleStops_Reverse[i] - start1) % 3 == 
0){
+                               foundSameFrame = true;
+                               break;
+                       }
+               }
+               
+               if(!foundSameFrame){
+                       cluster.possibleStops_Reverse[posAr++] = start1;
+                       
findPossibleStops_Reverse_AlternativeStop(cluster,contigSeq,posAr,start1+3,alternativeStarts);
+               }else{
+                       
findPossibleStops_Reverse_AlternativeStop(cluster,contigSeq,posAr,start1+3,alternativeStarts);
+               }
+               
+               return start1;
+       }
+
+       /*
         * test if there is one of the possible start-stop codon pairs which is 
in frame
         * take the smallest interval possible
         */
diff --git a/src/geneFinder/GeneFinder.java b/src/geneFinder/GeneFinder.java
index 5dba17a..6a81249 100755
--- a/src/geneFinder/GeneFinder.java
+++ b/src/geneFinder/GeneFinder.java
@@ -30,6 +30,8 @@ public class GeneFinder {
        
        public static Map<File,String> genomeFilesWithNames = new 
HashMap<File,String>();
        public static Map<File,String> rnaFilesWithNames = new 
HashMap<File,String>();
+       
+       public static Map<String,String[]> alternativeCodons = new 
HashMap<String,String[]>();
 
        public static boolean useTopHat;                        // indicator 
for mapping tool
        public static String settingMapper;                     // setting for 
the mapping tool, differs slightly depending on which tool was chosen
@@ -80,13 +82,7 @@ public class GeneFinder {
        
        public static Object[] manager(String[] args){
                
-       ReadInParameters_GeneFinder.readIn_GF(args);
-               
-               /*Gene gene = new Gene();
-               gene.startPos = 0;
-               String seq = readInFasta();
-               Prokaryote_Specials.define_OrfsInOperon(seq,gene);
-               System.exit(0);*/
+           ReadInParameters_GeneFinder.readIn_GF(args);
                
                long timeBef = System.currentTimeMillis();
 
diff --git a/src/geneFinder/Giira.java b/src/geneFinder/Giira.java
index 4f264b5..f669535 100755
--- a/src/geneFinder/Giira.java
+++ b/src/geneFinder/Giira.java
@@ -43,7 +43,10 @@ public class Giira {
                
                try {
                        String decodedPath = URLDecoder.decode(path, "UTF-8");
-                       String scriptPath = 
decodedPath.substring(0,decodedPath.length()-9);
+                       String[] pathArr = decodedPath.split("/");
+                       int lengthName = pathArr[pathArr.length-1].length();
+                       String scriptPath = 
decodedPath.substring(0,decodedPath.length()-lengthName);
+                       //String scriptPath = 
decodedPath.substring(0,decodedPath.length()-9);
                        //System.out.println("Path of Giira: " + decodedPath);
                        
                        classPath = "";
diff --git a/src/geneFinder/ProkaryoteExtraction.java 
b/src/geneFinder/ProkaryoteExtraction.java
index 65c12a4..9e19847 100755
--- a/src/geneFinder/ProkaryoteExtraction.java
+++ b/src/geneFinder/ProkaryoteExtraction.java
@@ -56,6 +56,9 @@ public class ProkaryoteExtraction {
                                        if(line.startsWith(">")){
                                                // test if correct contig
                                                
if(line.substring(1).startsWith(contigName)){
+                                                       
if(!((line.substring(1).startsWith(contigName+" ")) || 
(line.substring(1).length() == contigName.length()))){
+                                                               continue;       
                // as an additional check to avoid picking the wrong contig 
because of name sub-similarities                                                
                                    
+                                                       }
                                                        // found right one, now 
extract sequence
                                                        while(((line = 
br.readLine()) != null) && (line.length() != 0) &&  (!(line.startsWith(">")))){
                                                                String line2 = 
"";
diff --git a/src/geneFinder/Prokaryote_Specials.java 
b/src/geneFinder/Prokaryote_Specials.java
index ed886ee..bbb3cfc 100755
--- a/src/geneFinder/Prokaryote_Specials.java
+++ b/src/geneFinder/Prokaryote_Specials.java
@@ -234,6 +234,12 @@ public class Prokaryote_Specials {
        
        public static Vector<int[]> searchFO_orfs(String inputSeq){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("START 
FO")){
+                               return 
searchFO_orfs_alternativeCodons(inputSeq, 
GeneFinder.alternativeCodons.get("START FO"), 
GeneFinder.alternativeCodons.get("STOP FO"));
+                       }
+               }
+               
                Vector<int[]> allORFs_FO = new Vector<int[]>();
                
                int foundNewATG = 1;
@@ -315,12 +321,118 @@ public class Prokaryote_Specials {
        }
        
        /*
+        * if alternative start and stop codons are specified, respect this in 
a more general orf search
+        * 
+        */
+       
+       public static Vector<int[]> searchFO_orfs_alternativeCodons(String 
inputSeq, String[] alternativeStarts_FO, String[] alternativeStops_FO){
+               
+               Vector<int[]> allORFs_FO = new Vector<int[]>();
+               
+               int foundNewATG = 1;
+               int posLastATG = 0;
+               
+               do{
+                       
+                       int startPos = -1;
+                       
+                       String startPart_alt = inputSeq.substring(posLastATG);
+                       
+                       int startSub_alt[] = new 
int[alternativeStarts_FO.length];
+                       
+                       for(int i = 0; i<alternativeStarts_FO.length;++i){
+                               startSub_alt[i] = 
startPart_alt.indexOf(alternativeStarts_FO[i]);
+                       }
+                       
+                       java.util.Arrays.sort(startSub_alt);
+                       
+                       for(int i = 0; i < startSub_alt.length;++i){
+                               if(startSub_alt[i] > -1){ 
+                                       startPos = startSub_alt[i]; 
+                                       break;
+                               }
+                       }
+                                       
+                       int stopPos = -1;
+                       
+                       int posLastStart = -1;
+                       
+                       if(startPos == -1){
+                               foundNewATG = 0;
+                               break;
+                       }else{
+                               startPos = startPos  + posLastATG;
+                               posLastATG = startPos + 3;
+                               posLastStart = startPos + 3;
+                       }
+
+                       int goOn = 0;
+                       
+                       do{
+                               goOn = 0;
+                               
+                               String stopPart = 
inputSeq.substring(posLastStart);
+                               
+                               int stopSub[] = new 
int[alternativeStops_FO.length];
+                               
+                               for(int i = 0; 
i<alternativeStops_FO.length;++i){
+                                       stopSub[i] = 
stopPart.indexOf(alternativeStops_FO[i]);
+                               }
+                               
+                               java.util.Arrays.sort(stopSub);
+                               
+                               for(int i = 0; i < stopSub.length;++i){
+                                       if(stopSub[i] > -1){ 
+                                               if(((((posLastStart + 
stopSub[i])-startPos) % 3) == 0)){
+                                                       stopPos = posLastStart 
+ stopSub[i]; 
+                                               }else{
+                                                       posLastStart = 
posLastStart + stopSub[i]+1;
+                                                       goOn = 1;
+                                               }
+                                               break;
+                                       }
+                               }
+                               
+                               if(stopPos != -1){
+                                       
+                                       if(stopPos-startPos > 30){
+                                               
if(!checkIfORFcovered(allORFs_FO,new int[]{startPos,(stopPos+2)})){
+                                                       allORFs_FO.add(new 
int[]{startPos,(stopPos+2)});
+                                                       for(int 
i=startPos;i<=stopPos+2;++i){
+                                                               cov[i]++;
+                                                       }
+                                               }else{
+                                                       alreadyCovered++;
+                                               }                               
                
+                                       }else{
+                                               notCounted++;
+                                       }
+                                       
+                                       break;
+                               }
+                               
+                       }while(goOn == 1);
+                       
+                       
+               }while(foundNewATG == 1);
+               
+               
+               return allORFs_FO;
+       }
+       
+       /*
         * searches all ORFs assuming reverse direction
         * note: no length limit is set, ORFs too short should be penalized in 
the BIC scoring
         */
        
        public static Vector<int[]> searchRE_orfs(String inputSeq){
                
+               if(!GeneFinder.alternativeCodons.isEmpty()){
+                       if(GeneFinder.alternativeCodons.containsKey("START 
RE")){
+                               return 
searchRE_orfs_alternativeCodons(inputSeq, 
GeneFinder.alternativeCodons.get("STOP RE"), 
GeneFinder.alternativeCodons.get("START RE")); // are stored the other way 
around so start is stop and vice versa
+                       }
+               }
+               
                Vector<int[]> allORFs_RE= new Vector<int[]>();
                
                int foundNewCAT = 1;
@@ -402,6 +514,105 @@ public class Prokaryote_Specials {
        }
        
        /*
+        * if alternative start and stop codons are specified, respect this in 
a more general orf search
+        * 
+        */
+       
+       public static Vector<int[]> searchRE_orfs_alternativeCodons(String 
inputSeq, String[] alternativeStarts_RE, String[] alternativeStops_RE){
+               
+               Vector<int[]> allORFs_RE= new Vector<int[]>();
+               
+               int foundNewCAT = 1;
+               int posLastCAT = inputSeq.length();
+               
+               do{
+                       int startPos = -1;
+                       
+                       String startPart_alt = inputSeq.substring(0,posLastCAT);
+                       
+                       int startSub_alt[] = new 
int[alternativeStarts_RE.length];
+                       
+                       for(int i = 0; i<alternativeStarts_RE.length;++i){
+                               startSub_alt[i] = 
startPart_alt.lastIndexOf(alternativeStarts_RE[i]);
+                       }
+                       
+                       java.util.Arrays.sort(startSub_alt);
+                       
+                       for(int i = startSub_alt.length -1; i>= 0;i--){
+                               if(startSub_alt[i] > -1){ 
+                                       startPos = startSub_alt[i]; 
+                                       break;
+                               }
+                       }
+                       
+                       int stopPos = -1;
+                       
+                       int posLastStop = -1;
+                       
+                       if(startPos == -1){
+                               foundNewCAT = 0;
+                               break;
+                       }else{
+                               posLastCAT = startPos;
+                               posLastStop = startPos;
+                       }
+                       
+                       int goOn = 0;
+                       
+                       do{
+                               goOn = 0;
+                               
+                               String stopPart = 
inputSeq.substring(0,posLastStop);
+                       
+                               int stopSub[] = new 
int[alternativeStops_RE.length];
+                               
+                               for(int i = 0; 
i<alternativeStops_RE.length;++i){
+                                       stopSub[i] = 
stopPart.lastIndexOf(alternativeStops_RE[i]);
+                               }
+                               
+                               java.util.Arrays.sort(stopSub);
+                               
+                               for(int i = stopSub.length -1; i>= 0;i--){
+                                       if(stopSub[i] > -1){ 
+                                               if(((startPos-stopSub[i]) % 3) 
== 0){
+                                                       stopPos = stopSub[i];
+                                               }else{
+                                                       posLastStop = 
stopSub[i]+2;
+                                                       goOn = 1;
+                                               }                               
                        
+                                               break;
+                                       }
+                               }
+                               
+                               if(stopPos != -1){
+                                       
+                                       if(startPos-stopPos > 30){
+                                               
if(!checkIfORFcovered(allORFs_RE,new int[]{stopPos,(startPos+2)})){
+                                                       allORFs_RE.add(new 
int[]{stopPos,(startPos+2)});
+                                                       for(int 
i=stopPos;i<=startPos+2;++i){
+                                                               cov[i]++;
+                                                       }
+                                               }else{
+                                                       alreadyCovered++;
+                                               }                               
        
+                                       }else{
+                                               notCounted++;
+                                       }
+                                       
+                                       
+                                       break;
+                               }
+                               
+                       }while(goOn == 1);
+                       
+                       
+               }while(foundNewCAT == 1);
+               
+               
+               return allORFs_RE;
+       }
+       
+       /*
         * filter out all orfs that are completely included in bigger ones
         */
        
diff --git a/src/geneFinder/ReadInParameters_GeneFinder.java 
b/src/geneFinder/ReadInParameters_GeneFinder.java
index 9313070..3d0fd75 100755
--- a/src/geneFinder/ReadInParameters_GeneFinder.java
+++ b/src/geneFinder/ReadInParameters_GeneFinder.java
@@ -58,6 +58,7 @@ public class ReadInParameters_GeneFinder {
                boolean foundProkaryote = false;
                boolean foundSequential = false;
                boolean foundInprogea = false;
+               boolean foundAlternativeCodons = false;
                
                
                if(!parameter.isEmpty() && args.length > 0){
@@ -230,6 +231,12 @@ public class ReadInParameters_GeneFinder {
                                                inputText += "minimal interval 
length: " + GeneFinder.interval + "\n";  
                                        }
                                                
+                               } else if(arg.equals("-altCodon")){  // 
alternative start and stop codons
+                                       String pathToAlternative = args[i+1];
+                                       
readInAlternativeStartsStops(pathToAlternative);
+                                       foundAlternativeCodons = true;
+                                       inputText += "Alternative Starts and 
stops provided \n";
+                               
                                } else if(arg.equals("-noAmbiOpti")){  // turn 
on or off the optimization of ambiguous reads
                                        foundAmbiOpti = true;                   
                                        GeneFinder.noAmbiOpti = true;
@@ -321,6 +328,21 @@ public class ReadInParameters_GeneFinder {
                
                if(!havePathOut){
                        GeneFinder.pathOut = "";
+               }else{
+                       // check if directory exists, if not, create it
+                       File f = new File(GeneFinder.pathOut);
+                       if(!f.exists()){
+                               Runtime rtAlign = Runtime.getRuntime();
+                               try {
+                                       String exe = "mkdir " + 
GeneFinder.pathOut;
+                                       Process pc = rtAlign.exec(exe);
+                                       pc.waitFor();                   
+                               } catch (IOException e) {
+                                       e.printStackTrace();
+                               } catch (InterruptedException e) {
+                                       e.printStackTrace();
+                               }
+                       }
                }
                if(!foundGenome){
                        System.out.println("No genome file specified. Use 
\"-h\" to print usage options. ");
@@ -410,6 +432,7 @@ public class ReadInParameters_GeneFinder {
                        GeneFinder.inprogeaCall = false;
                }
                
+               
                GeneFinder.logFile = new File(GeneFinder.pathOut+"log_it" + 
GeneFinder.iteration + ".txt");
                if(!GeneFinder.secondPart){
                        System.out.println(inputText);
@@ -438,6 +461,66 @@ public class ReadInParameters_GeneFinder {
        }       
        
        /*
+        * reads in the alternative start and stop codons from a given input 
file
+        * one line per codon type, with codons tab separated
+        */
+       
+       public static void readInAlternativeStartsStops(String altFile) {
+               
+               Map<String,String[]> altCodons = new HashMap<String,String[]>();
+                       
+               try {
+                       
+                       BufferedReader br = new BufferedReader(new 
FileReader(altFile));
+                       
+                       String line = "";
+                       
+                       while((line = br.readLine()) != null){
+                               
+                               String[] lineArr = line.split("\t");
+                               String[] temp = new String[lineArr.length-1];
+                               
+                               if(line.startsWith("START FO")){
+
+                                       for(int i = 1; i<lineArr.length;++i){
+                                               temp[i-1] = lineArr[i];
+                                       }
+                                       
+                                       altCodons.put("START FO",temp);
+                               }
+                               if(line.startsWith("START RE")){
+                                       for(int i = 1; i<lineArr.length;++i){
+                                               temp[i-1] = lineArr[i];
+                                       }
+                                       
+                                       altCodons.put("STOP RE",temp);  // for 
GIIRA start and stops are switched for the reverse direction
+                               }
+                               if(line.startsWith("STOP FO")){
+                                       for(int i = 1; i<lineArr.length;++i){
+                                               temp[i-1] = lineArr[i];
+                                       }
+                                       
+                                       altCodons.put("STOP FO",temp);
+                               }
+                               if(line.startsWith("STOP RE")){
+                                       for(int i = 1; i<lineArr.length;++i){
+                                               temp[i-1] = lineArr[i];
+                                       }
+                                       
+                                       altCodons.put("START RE",temp); // for 
GIIRA start and stops are switched for the reverse direction
+                               }
+                       }
+                       
+               } catch (FileNotFoundException e) {
+                       e.printStackTrace();
+               } catch (IOException e) {
+                       e.printStackTrace();
+               }
+               
+               GeneFinder.alternativeCodons = altCodons;
+       }
+       
+       /*
         * print the help text to screen
         */
        
@@ -457,7 +540,7 @@ public class ReadInParameters_GeneFinder {
                                " \n -iG [pathToGenomes] : specify path to 
directory with genome files in fasta format \n" +
                                " \n -iR [pathToRna] : specify path to 
directory with rna read files in fastq format \n" +
                                " \n -scripts [absolutePath] : specify the 
absolute path to the directory containing the required helper scripts, DEFAULT: 
directory of GIIRA.jar \n" +
-                               " \n -out [pathToResults] : specify the 
directory that shall contain the results files \n" +
+                               " \n -out [pathToResults] : specify the 
absolute pyth to the directory that shall contain the results files \n" +
                                " \n -outName [outputName] : specify desired 
name for output files, DEFAULT: genes \n" +
                                " \n -haveSam [samfileName]: if a sam file 
already exists, provide the name, else a mapping is performed. NOTE: the sam 
file has to be sorted according to read names! \n" +
                                " \n -nT [numberThreads] : specify the maximal 
number of threads that are allowed to be used, DEFAULT: 1 \n" +
@@ -468,7 +551,7 @@ public class ReadInParameters_GeneFinder {
                                //" \n -splitRunAndOpti [y/n] : indicates if 
the optimization and giira shall be run separately, to reduce the memory 
consumption (y), DEFAULT: n" +
                                " \n -mem [int] : specify the amount of memory 
that cplex is allowed to use \n" +
                                " \n -maxReportedHits [int] : if using BWA as 
mapping tool, specify the maximal number of reported hits, DEFAULT: 2 \n" +
-                               " \n -prokaryote : if specified, genome is 
treated as prokaryotic, no spliced reads are accepted, and structural genes are 
resolved. DEFAULT: n \n" +
+                               " \n -prokaryote : if specified, genome is 
treated as prokaryotic, no spliced reads are accepted, and structural genes are 
resolved. DEFAULT: False \n" +
                                " \n -minCov [double] : specify the minimum 
required coverage of the gene candidate extraction, DEFAULT: -1 (is estimated 
from mapping) \n" +
                                " \n -maxCov [double] : optional maximal 
coverage threshold, can also be estimated from mapping (DEFAULT) \n" +
                                " \n -endCov [double] : if the coverage falls 
below this value, the currently open candidate gene is closed. This value can 
be estimated from the minimum coverage (-1); DEFAULT: -1 \n" +
@@ -476,6 +559,7 @@ public class ReadInParameters_GeneFinder {
                                " \n -interval [int] : specify the minimal size 
of an interval between near candidate genes, if \"-1\" it equals the read 
length. DEFAULT: -1 \n " +
                                " \n -splLim [double] : specify the minimal 
coverage that is required to accept a splice site, if (-1) the threshold is 
equal to minCov, DEFAULT: -1 \n" +
                                " \n -rL [int] : specify read length, otherwise 
this information is extracted from SAM file (DEFAULT) \n" +
+                               " \n -altCodon [pathToAlternativeCodons] : 
specify path to txt file with alternative start and stop codons, see example 
file in scripts folder \n" +
                                " \n -samForSequential [pathToSamFile] : if it 
is desired to analyse chromosomes in a sequential manner, provide a chromosome 
sorted sam file in addition to the one sorted by read names, DEFAULT: 
noSequential \n" +
                                " \n -noAmbiOpti : if specified, ambiguous hits 
are not included in the analysis \n" +                                  
                                " \n -settingMapper [(list of parameters)] : A 
comma-separated list of the desired parameters for TopHat or BWA. Please 
provide \n" +
diff --git a/src/geneFinder/SamParser.java b/src/geneFinder/SamParser.java
index 291befe..76377be 100755
--- a/src/geneFinder/SamParser.java
+++ b/src/geneFinder/SamParser.java
@@ -138,7 +138,21 @@ public class SamParser {
 
                                                        totalHitCount++;
                                                        
-                                                       
if(!parts[0].equals(currentReadID)){  // now we have proceeded to a new read
+                                                       String adaptedName = "";
+                                                       
+                                                       
if(parts[0].contains(":")){
+                                                               String[] 
nameParts = parts[0].split(":");
+                                                               for(int 
i=0;i<nameParts.length;++i){
+                                                                       
adaptedName += nameParts[i] + ";;;";   // necessary to avoid cplex or glpk 
errors
+                                                               }
+                                                               
+                                                               adaptedName = 
adaptedName.substring(0,(adaptedName.length()-3));
+                                                       }else{
+                                                               adaptedName = 
parts[0];
+                                                       }
+                                                       
+                                                       
+                                                       
if(!adaptedName.equals(currentReadID)){  // now we have proceeded to a new read
 
                                                                
if(GeneFinder.iteration == 2 && currentRead != null && currentRead.isMulti == 
1){
                                                                        
@@ -147,7 +161,20 @@ public class SamParser {
                                                                                
do{
                                                                                
        String[] partsReaSam = lineReaSam.split("       ");                     
                                                        
                                                                                
        
-                                                                               
        if(currentRead.rnaID.equals(partsReaSam[0])){
+                                                                               
        String adaptedNameReaSam = "";
+                                                                               
        
+                                                                               
        if(partsReaSam[0].contains(":")){
+                                                                               
                String[] nameParts = partsReaSam[0].split(":");
+                                                                               
                for(int i=0;i<nameParts.length;++i){
+                                                                               
                        adaptedNameReaSam += nameParts[i] + ";;;";   // 
necessary to avoid cplex or glpk errors
+                                                                               
                }
+                                                                               
                
+                                                                               
                adaptedNameReaSam = 
adaptedNameReaSam.substring(0,(adaptedNameReaSam.length()-3));
+                                                                               
        }else{
+                                                                               
                adaptedNameReaSam = partsReaSam[0];
+                                                                               
        }
+                                                                               
        
+                                                                               
        if(currentRead.rnaID.equals(adaptedNameReaSam)){
                                                                                
                
allReassigned.put(Integer.parseInt(partsReaSam[3]),partsReaSam[2]);
                                                                                
        }else{
                                                                                
                break;
@@ -165,12 +192,12 @@ public class SamParser {
                                                                        
                                                                }
 
-                                                               currentReadID = 
parts[0];                               
+                                                               currentReadID = 
adaptedName;                            
 
                                                                // set up new 
rna node
 
                                                                Rna newRna = 
new Rna();
-                                                               newRna.rnaID = 
parts[0];                
+                                                               newRna.rnaID = 
adaptedName;             
                                                                newRna.isMulti 
= 0;
                                                                newRna.hitNum = 
1;
                                                                
newRna.assignedNum = 0;
@@ -479,9 +506,22 @@ public class SamParser {
                                                        do{
                                                                String[] 
partsReaSam = lineReaSam.split("       ");
                                                                
-                                                               
if(partsReaSam[0].compareTo(currentRead.rnaID) > 0){
+                                                               String 
adaptedNameReaSam = "";
+                                                               
+                                                               
if(partsReaSam[0].contains(":")){
+                                                                       
String[] nameParts = partsReaSam[0].split(":");
+                                                                       for(int 
i=0;i<nameParts.length;++i){
+                                                                               
adaptedNameReaSam += nameParts[i] + ";;;";   // necessary to avoid cplex or 
glpk errors
+                                                                       }
+                                                                       
+                                                                       
adaptedNameReaSam = 
adaptedNameReaSam.substring(0,(adaptedNameReaSam.length()-3));
+                                                               }else{
+                                                                       
adaptedNameReaSam = partsReaSam[0];
+                                                               }
+                                                               
+                                                               
if(adaptedNameReaSam.compareTo(currentRead.rnaID) > 0){
                                                                        break;  
// we exceeded this read, so stop
-                                                               }else 
if(currentRead.rnaID.equals(partsReaSam[0])){
+                                                               }else 
if(currentRead.rnaID.equals(adaptedNameReaSam)){
                                                                        
allReassigned.put(Integer.parseInt(partsReaSam[3]),partsReaSam[2]);
                                                                }
                                                        }while((lineReaSam = 
br.readLine()) != null);
@@ -953,9 +993,23 @@ public class SamParser {
                                                        
                                                        Rna read;
                                                        
-                                                       
if(seenReads.keySet().contains(parts[0])){
+                                                       String adaptedName = "";
+                                                       
+                                                       
if(parts[0].contains(":")){
+                                                               String[] 
nameParts = parts[0].split(":");
+                                                               for(int 
i=0;i<nameParts.length;++i){
+                                                                       
adaptedName += nameParts[i] + ";;;";   // necessary to avoid cplex or glpk 
errors
+                                                               }
+                                                               
+                                                               adaptedName = 
adaptedName.substring(0,(adaptedName.length()-3));
+                                                       }else{
+                                                               adaptedName = 
parts[0];
+                                                       }
+                                                       
+                                                       
+                                                       
if(seenReads.keySet().contains(adaptedName)){
                                                                
-                                                               Vector<Object> 
temp = seenReads.get(parts[0]);
+                                                               Vector<Object> 
temp = seenReads.get(adaptedName);
                
                                                                
if(((Integer)temp.get(0)) != 0){
                                                                        
@@ -973,7 +1027,7 @@ public class SamParser {
                                                                                
        
                                                                                
        temp.clear();
                                                                                
        temp.add(0);
-                                                                               
        seenReads.put(parts[0],temp);
+                                                                               
        seenReads.put(adaptedName,temp);
                                                                                
        
                                                                                
        if(totalHitCount % 100000 == 0){
                
@@ -1005,7 +1059,7 @@ public class SamParser {
                                                                                
interChromoTotalCount++;
                                                                                
Vector<Object> temp = new Vector<Object>();
                                                                                
temp.add(0);                                                                    
-                                                                               
seenReads.put(parts[0],temp);
+                                                                               
seenReads.put(adaptedName,temp);
                                                                                
interChromoTotalCount++;
                                                                                
break;
                                                                        }
@@ -1049,7 +1103,7 @@ public class SamParser {
                                                                        
Vector<Object> temp = new Vector<Object>();
                                                                        
temp.add(1);
                                                                        
temp.add(read);
-                                                                       
seenReads.put(parts[0],temp);   
+                                                                       
seenReads.put(adaptedName,temp);        
                                                                }
                                                        }
 
diff --git a/src/types/Rna.java b/src/types/Rna.java
index 590f31d..8e8e123 100755
--- a/src/types/Rna.java
+++ b/src/types/Rna.java
@@ -17,7 +17,7 @@ public class Rna {
        
        public double quality;          
 
-       public Vector<Object[]> contigsMappedOn = new Vector<Object[]>(); // 
contains several Arrays รก: [contig, alignPos, cigarString, 
mapQual,spliceInfo,mismatchInfo,direcInfo] (one for each hit)
+       public Vector<Object[]> contigsMappedOn = new Vector<Object[]>(); // 
contains several Arrays ala: [contig, alignPos, cigarString, 
mapQual,spliceInfo,mismatchInfo,direcInfo] (one for each hit)
        
        public int isMulti;             // indicator if this read is an 
ambiguous read
        

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/giira.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to