Revision: 21644 http://sourceforge.net/p/jmol/code/21644 Author: hansonr Date: 2017-06-28 19:02:01 +0000 (Wed, 28 Jun 2017) Log Message: ----------- CIP rewrite for 4b/5
Modified Paths: -------------- trunk/Jmol/src/org/jmol/symmetry/CIPChirality.java Modified: trunk/Jmol/src/org/jmol/symmetry/CIPChirality.java =================================================================== --- trunk/Jmol/src/org/jmol/symmetry/CIPChirality.java 2017-06-27 07:36:29 UTC (rev 21643) +++ trunk/Jmol/src/org/jmol/symmetry/CIPChirality.java 2017-06-28 19:02:01 UTC (rev 21644) @@ -9,6 +9,7 @@ import javajs.util.P3; import javajs.util.P4; import javajs.util.PT; +import javajs.util.SB; import javajs.util.V3; import org.jmol.java.BS; @@ -108,6 +109,8 @@ * * code history: * + * 6/28/17 Jmol 14.19.2 major rewrite of Rule 4b + * * 6/25/17 Jmol 14.19.1 minor fixes for Rule 4b and 5 for BH64_012-015; better atropisomer check * * 6/12/2017 Jmol 14.18.2 tested for Rule 1b sphere (AY236.53, 163, 173, 192); 957 lines @@ -366,6 +369,9 @@ static final int RULE_4c = 7; static final int RULE_5 = 8; + static String prefixString = ".........."; + static Integer zero = Integer.valueOf(0); + public String getRuleName() { return JC.getCIPRuleName(currentRule); } @@ -1001,7 +1007,7 @@ Logger.info("-Rule " + getRuleName() + " CIPChirality for " + cipAtom + "-----"); // Logger if (currentRule == RULE_4a) - cipAtom.createAuxiliaryRule4Data(null, null); + cipAtom.createRule4AuxiliaryData(null, "", null); isChiral = cipAtom.sortSubstituents(0); if (Logger.debugging) { Logger.info(currentRule + ">>>>" + cipAtom); @@ -1178,84 +1184,112 @@ private class CIPAtom implements Comparable<CIPAtom>, Cloneable { /** - * the associated Jmol (or otherwise). Use of Node allows us to implement - * this in SMILES or Jmol + * unique ID for this CIPAtom for debugging only * */ - SimpleNode atom; + private int id; /** - * unique ID for this CIPAtom for debugging only + * bond distance from the root atom to this atom + */ + int sphere; + + /** + * Rule 1b measure: Distance from root of the corresponding nonduplicated + * atom (duplicate nodes only). * + * AMENDED HERE for duplicate nodes associated with a double-bond in a + * 6-membered-ring benzenoid (benzene, naphthalene, pyridine, pyrazoline, + * etc.) "Kekule-ambiguous" system to be its sphere. + * */ - private int id; + private int rootDistance; + /** - * direct ancestor of this atom + * current priority of this atom, 0-3 + */ + + private int priority; + + /** + * a flag to prevent finalization of an atom node more than once * */ - CIPAtom parent; + private boolean isSet; /** - * first atom in this atom's root branch + * a flag to indicate atom that is a duplicate of another, either due to + * ring closure or multiple bonding -- element number and mass, but no + * substituents; slightly lower in priority than standard atoms. + * */ - private CIPAtom rootSubstituent; + boolean isDuplicate = true; /** - * Rule 1 nominal element number; may be fractional for Kekule issues + * a flag to indicate an atom that has no substituents; a branch end point; + * typically H or a halogen (F, Cl, Br, I) + * */ - float elemNo; + boolean isTerminal; /** - * Rule 2 nominal atomic mass; may be a rounded value so that 12C is the - * same as C + * at atom such as N, P, or S with three non-coplanar bonds + * */ - int massNo; + boolean isTrigonalPyramidal; /** - * bond distance from the root atom to this atom + * is one atom of a double bond */ - int sphere; + boolean isAlkene; + /** - * the array of indices of the associated atoms in the path to this atom - * node from the root; used to keep track of the path to this atom in order - * to prevent infinite cycling; the last atom in the path when cyclic is a - * duplicate atom. + * the associated Jmol (or otherwise) atom; use of the SimpleNode interface + * allows us to implement this in SMILES or Jmol as well as providing an + * interface other programs could use if implementing this code * */ - BS bsPath; + SimpleNode atom; /** - * String path, for debugging only + * the application-assigned unique atom index for this atom; used in + * updating lstSmallRings * */ - private String myPath = ""; + private int atomIndex; /** - * Rule 1b measure: Distance from root of the corresponding nonduplicated - * atom (duplicate nodes only). - * - * AMENDED HERE for duplicate nodes associated with a double-bond in a - * 6-membered-ring benzenoid (benzene, naphthalene, pyridine, pyrazoline, - * etc.) "Kekule-ambiguous" system to be its sphere. - * + * true atom covalent bond count; cached for better performance */ + int bondCount; - private int rootDistance; + /** + * Rule 1a nominal element number; may be fractional for Kekule issues + */ + float elemNo; /** - * number of substituent atoms (non-null atoms[] entries) + * Rule 2 nominal atomic mass; may be a rounded value so that 12C is the + * same as C */ - private int nAtoms; + int massNo; + ///// SUBSTITUENTS //// + /** - * the number of distinct priorities determined for this atom for the - * current rule + * direct ancestor of this atom + * */ - private int nPriorities; + CIPAtom parent; /** + * sphere-1 node in this atom's root branch + */ + private CIPAtom rootSubstituent; + + /** * a count of how many 1H atoms we have found on this atom; used to halt * further processing of this atom */ @@ -1262,38 +1296,60 @@ private int h1Count; /** - * auxiliary chirality as determined in createAuxiliaryRule4Data + * the substituents -- up to four supported here at this time + * */ - private String auxChirality = "~"; + CIPAtom[] atoms = new CIPAtom[4]; /** - * a flag to prevent finalization of an atom node more than once - * + * number of substituent atoms (non-null atoms[] entries) */ - private boolean isSet; + private int nAtoms; /** - * a flag to indicate atom that is a duplicate of another, either due to - * ring closure or multiple bonding -- element number and mass, but no - * substituents; slightly lower in priority than standard atoms. + * bitset of indices of the associated atoms in the path to this atom + * node from the root; used to keep track of the path to this atom in order + * to prevent infinite cycling; the last atom in the path when cyclic is a + * duplicate atom. * */ - boolean isDuplicate = true; + BS bsPath; /** - * a flag to indicate an atom that has no substituents; a branch end point; - * typically H or a halogen (F, Cl, Br, I) + * String path, for debugging * */ - boolean isTerminal; + private String myPath = ""; /** - * is one atom of a double bond + * priorities associated with each subsituent from high (0) to low (3); due + * to equivaliencies at a given rule level, these numbers may duplicate and + * have gaps - for example, [0 2 0 3] */ + int[] priorities = new int[4]; - boolean isAlkene; + /** + * the number of distinct priorities determined for this atom for the + * current rule; 0-4 for the root atom; 0-3 for all others + */ + private int nPriorities; /** + * pointer to this branch's spiro end atom if it is found to be spiro + */ + + CIPAtom spiroEnd; + + /** + * Rule 1b hash table that maintains distance of the associated + * nonduplicated atom node + * + */ + Map<Integer, Integer> htPathPoints; + + /////// double and triple bonds /////// + + /** * first atom of an alkene or cumulene atom */ @@ -1306,12 +1362,6 @@ CIPAtom alkeneChild; /** - * last atom of a root-spiro - */ - - CIPAtom spiroEnd; - - /** * a flag used in Rule 3 to indicate the second carbon of a double bond */ @@ -1318,56 +1368,48 @@ private boolean isAlkeneAtom2; /** - * temporary check for pseudochirality + * is an atom that is involved in more than one Kekule form */ - boolean doCheckPseudo; + boolean isKekuleAmbiguous; /** - * permanent check for pseudochirality + * first =X= atom in a string of =X=X=X=... */ - boolean isPseudo; + private CIPAtom nextSP2; /** - * Force achiral condition due to two identical ligands after Rule 4 check. + * potentially useful information that this duplicate is from an double- or + * triple-bond, not a ring closure */ - boolean achiral; + boolean sp2Duplicate; /** - * true atom covalent bond count + * alkene or even cumulene, so chirality will be EZ, not MP */ - int bondCount; + private boolean isEvenEne = true; - /** - * the substituents -- up to four supported here at this time - * - */ - CIPAtom[] atoms = new CIPAtom[4]; + //// AUXILIARY CHIRALITY for Rules 4 and 5 ///// /** - * priorities associated with each subsituent from high (0) to low (3); due - * to equivaliencies at a given rule level, these numbers may duplicate and - * have gaps - for example, [0 2 0 3] + * already-determined auxiliary chirality (E/Z, R/S, etc) for this atom + * node; this value must be cleared after Rule 3 if continuing */ - int[] priorities = new int[4]; + private int auxEZ = STEREO_UNDETERMINED; /** - * a list that tracks stereochemical paths for Mata analysis - * + * temporary check for pseudochirality */ - private String[] rule4List; + boolean doCheckPseudo; /** - * the application-assigned unique atom index for this atom; used in - * updating lstSmallRings - * + * permanent check for pseudochirality */ - private int atomIndex; + boolean isPseudo; /** - * already-determined auxiliary chirality (E/Z, R/S, etc) for this atom - * node; this value must be cleared after Rule 3 if continuing + * Force achiral condition due to two identical ligands after Rule 4 check. */ - private int auxEZ = STEREO_UNDETERMINED; + boolean achiral; /** * a flag set false in Mata analysis @@ -1385,9 +1427,11 @@ boolean isAxialRoot; /** - * first =X= atom in a string of =X=X=X=... + * auxiliary chirality as determined in createAuxiliaryRule4Data; + * possibilities include R/S, r/s, M/P, m/p, C/T (but not c/t), or ~ (ASCII + * 126, no stereochemistry); for sorting purposes C=M=R < p=r=s < ~ */ - private CIPAtom nextSP2; + private String auxChirality = "~"; /** * points to next branching point that has two or more chiral branches @@ -1397,38 +1441,27 @@ /** * [sphere, nR, nS] -- tracks the number of R and S centers for the lowest sphere */ - private int[] rule4Count; + private Object[] rule4Count; /** + * a list that tracks stereochemical paths for this branch section for Mata + * analysis in the form of pAAAAA where p is 0-3, the priority up through + * Rule 4a, and A is one of R, S, M, P, C, T, r, s, m, p, where C = seqCis; + * T = seqTrans; seqcis and seqtrans are irrelevant) * */ - private int priority; + private String[] rule4List; /** - * Rule 1b hash table that maintains distance of the associated - * nonduplicated atom node + * a list of string buffers that tracks full-length stereochemical paths for + * the branch's root atom only for Mata analysis in the form of + * p1XXXXXp2YYYYp3ZZZZZ where pn is 0-3, the priority up through Rule 4a; + * used for the final flattening of the ligand path for like/unlike analysis * */ - Map<Integer, Integer> htPathPoints; - - boolean isTrigonalPyramidal; - - /** - * is an atom that is involved in more than one Kekule form - */ - boolean isKekuleAmbiguous; - - /** - * potentially useful information that this duplicate is from an double- or - * triple-bond, not a ring closure - */ - boolean sp2Duplicate; - - /** - * alkene or even cumulene, so chirality will be EZ, not MP - */ - private boolean isEvenEne = true; - + + private Lst<Object[]> rule4Paths; + CIPAtom() { // had a problem in JavaScript that the constructor of an inner function cannot // access this.b$ yet. That assignment is made after construction. @@ -2265,59 +2298,61 @@ private int checkRule4And5(int i, int j) { return (rule4List[i] == null && rule4List[j] == null ? TIED : rule4List[j] == null ? A_WINS : rule4List[i] == null ? B_WINS - : compareMataPair(i, j)); + : compareMataPair(atoms[i], atoms[j])); } /** - * Chapter 9 Rules 4b and 5: like vs. unlike + * Chapter 9 Rules 4b and 5: like vs. unlike; for root substituents only. * - * Compare two strings such as RSSSR and SRSRR for like and unlike and find - * a winner. Return IGNORE if they are identical; return A_WINS or B_WINS if - * there is a winner, and set this.doCheckPseudo if they are opposites with - * reference atom R or S (but not r or s). + * (1) Generate full set of branching stereochemical paths (rule4Paths) for + * each ligand. * - * @param ia - * @param ib + * (2) Determine the reference descriptor (R, S, or both) for each ligand. + * + * (3) Flatten each path to one string by traversing the sorted list sphere + * by sphere. + * + * (4) Compare paths using like/unlike criteria. + * + * @param a + * @param b * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS), or Integer.MIN_VALUE * (IGNORE) */ - private int compareMataPair(int ia, int ib) { - // note that opposites will need to generate "R" or "S" keys, which will be - // resolved as "r" or "s" - // but generally we will want to process this as "R" and "S" - // note that this analysis cannot be done ahead of time - - String aStr = rule4List[ia].substring(1), bStr = rule4List[ib] - .substring(1); - boolean haveRSOptions = false, isRule5 = (currentRule == RULE_5); - String sa = "", sb = ""; - if (atoms[ia].nextChiralBranch != null) { - sa = atoms[ia].nextChiralBranch.getMataList(getFirstRef(aStr), isRule5); - haveRSOptions = (sa.indexOf("|") >= 0); - } - if (atoms[ib].nextChiralBranch != null) { - sb = atoms[ib].nextChiralBranch.getMataList(getFirstRef(bStr), isRule5); - if (haveRSOptions != (sb.indexOf("|") >= 0)) { - // P-92.5.2.1.i check -- one ref beats two refs - // this is a convenience check only, actually, as the - // ligand with two refs is guaranteed to lose to the one with one ref. - return (haveRSOptions ? B_WINS : A_WINS); - } - } - if (haveRSOptions) { - aStr += "|" + sa; - bStr += "|" + sb; - } else { - aStr += sa; - bStr += sb; - } + private int compareMataPair(CIPAtom a, CIPAtom b) { + // Step 1: Generate paths by building a list of all paths from the root substituent out. + // For example, say we have the following, where (n) is a priority, if necessary: + // + // S(1) + // / + // []--S--r + // \ + // R(1) + // + // then we build ["SrS" and "SrR"] + // - if (true || Logger.debugging) - Logger.info(dots() + this + " comparing " + atoms[ia] + " " + aStr - + " to " + atoms[ib] + " " + bStr); // Logger - if (isRule5 || !haveRSOptions && aStr.length() != bStr.length()) { - // note that these two strings can be different lengths - // if we have sXX and ~ + a.generateRule4Paths(); + b.generateRule4Paths(); + // + boolean isRule5 = (currentRule == RULE_5); + String aref = (isRule5 ? "R" : a.getRule4ReferenceDescriptor()); + String bref = (isRule5 ? "R" : b.getRule4ReferenceDescriptor()); + boolean haveRSOptions = (aref == "RS"); + if (aref.length() != bref.length()) + return (haveRSOptions ? B_WINS : A_WINS); + String aStr = (haveRSOptions ? a.flattenRule4Paths('R', isRule5) + "|" + + a.flattenRule4Paths('S', isRule5) : a.flattenRule4Paths( + aref.charAt(0), isRule5)); + String bStr = (haveRSOptions ? b.flattenRule4Paths('R', isRule5) + "|" + + b.flattenRule4Paths('S', isRule5) : b.flattenRule4Paths( + bref.charAt(0), isRule5)); + + if (Logger.debugging) + Logger.info(dots() + this + " comparing " + a + " " + aStr + " to " + b + + " " + bStr); // Logger + if (isRule5) { + // note that these two strings cannot be different lengths return sign(aStr.compareTo(bStr)); } aStr = cleanRule4Str(aStr); @@ -2331,22 +2366,18 @@ // Solution is to SUM all winners. If that is 0, then they are the same String[] aList = PT.split(aStr, "|"), bList = PT.split(bStr, "|"); int minScore = Integer.MAX_VALUE, sumScore = 0; - aStr = aList[0] + aList[1]; - bStr = bList[0] + bList[1]; - for (int i = aList.length; --i >= 1;) { - for (int j = bList.length; --j >= 1;) { - int score = compareRule4PairStr(aList[0] + aList[i], bList[0] - + bList[j], true); + for (int i = aList.length; --i >= 0;) { + for (int j = bList.length; --j >= 0;) { + int score = compareRule4PairStr(aList[i], bList[j], true); sumScore += score; - if (score != TIED && Math.abs(score) <= minScore) { - minScore = Math.abs(score); - aStr = aList[0] + aList[i]; - bStr = bList[0] + bList[j]; + if (score != TIED && Math.abs(score) <= Math.abs(minScore)) { + minScore = score; } } } - if (sumScore == TIED) - return TIED; + minScore = (sumScore == TIED ? TIED : minScore < 0 ? A_WINS : B_WINS); + Logger.info(aStr + (minScore == A_WINS ? " > " : minScore == B_WINS ? " < " : " = ") + bStr); + return minScore; } if (aStr.length() == 1 && "RS".indexOf(aStr) < 0) { int score = checkEnantiomer(aStr, bStr, 0, aStr.length(), " rs"); @@ -2361,122 +2392,176 @@ return compareRule4PairStr(aStr, bStr, false); } - private String cleanRule4Str(String aStr) { - return (aStr.length() > 1 ? PT.replaceAllCharacters(aStr, "sr~", "") - : aStr); - } - /** - * Just get the first R- or S-equivalent in "~~~~xxxxx" - * - * @param aStr - * @return "R", "S", or null + * Combine all subpaths */ - private String getFirstRef(String aStr) { - int r = aStr.indexOf("R"), s = aStr.indexOf("S"); - return (r < 0 && s < 0 ? null : s > 0 && (r < 0 || r > s) ? "S" : "R"); + private void generateRule4Paths() { + rule4Paths = new Lst<Object[]>(); + appendRule4Paths(this, null); + + if (true || Logger.debugging) { + Logger.info("Rule 4b paths for " + this + "=\n"); + for (int i = 0; i < rule4Paths.size(); i++) { // Logger + String s = rule4Paths.get(i)[0].toString(); // Logger + int prefixLen = ((Integer)rule4Paths.get(i)[1]).intValue(); // Logger + while (prefixString.length() < prefixLen) // Logger + prefixString += prefixString; // Logger + Logger.info(prefixString.substring(0, prefixLen) + s.substring(prefixLen)); + } + Logger.info(""); + } } + + private void appendRule4Paths(CIPAtom rootsub, SB path) { + String s0 = (path == null ? auxChirality : path.toString()); + if (path == null) + path = rootsub.addRule4Path(s0, true); + int iFirst = 4; + int lastPriority = -1; + boolean addPriority = false; + for (int i = 4; --i >= 0;) + if (rule4List[i] != null) { + iFirst = i; + if (lastPriority == -1) + lastPriority = atoms[i].priority; + else if (lastPriority != atoms[i].priority) + addPriority = true; + } + for (int i = iFirst; i < 4; i++) + if (rule4List[i] != null) { + if (i != iFirst) + path = rootsub.addRule4Path(s0, false); + if (addPriority) + path.appendI(atoms[i].priority + 1); + path.append(rule4List[i]); + if (atoms[i].nextChiralBranch != null) + atoms[i].nextChiralBranch.appendRule4Paths(rootsub, path); + } + } - /** - * Retrieve the Mata Rule 4b list for a given atom. - * - * @param aref - * @param isRule5 - * @return a String representation of the path through the atoms - * - */ - private String getMataList(String aref, boolean isRule5) { - int n = 0; - for (int i = rule4List.length; --i >= 0;) - if (rule4List[i] != null) - n++; - String[] listA = new String[n]; - for (int j = n, i = rule4List.length; --i >= 0;) - if (rule4List[i] != null) - listA[--j] = rule4List[i]; - if (aref == null) { - aref = getMataRef(isRule5); - } else { - // we need to add the priority business only if this is the first case - for (int i = 0; i < n; i++) - listA[i] = "." + listA[i].substring(1); - } - return (aref.length() == 1 ? getMataSequence(listA, aref, isRule5) - : getMataSequence(listA, "R", false) + "|" - + getMataSequence(listA, "S", false)); + private SB addRule4Path(String s0, boolean doPrefix) { + SB path = new SB(); + path.append(s0); + rule4Paths.addLast(new Object[] {path, new Integer(doPrefix ? 0 : s0.length())}); + return path; } /** - * The reference designation is the most popular of R and S of the highest- - * priority node, or both if there are the same number at highest-priority - * node level + * rule4Count holds in [1] and [2] the number of R and S descriptors, respectively, + * in the highest ranking sphere with stereochemistry. * - * @param isRule5 * @return "R", "S", or "RS" */ - private String getMataRef(boolean isRule5) { - return (isRule5 ? "R" : rule4Count[STEREO_R] > rule4Count[STEREO_S] ? "R" - : rule4Count[STEREO_R] < rule4Count[STEREO_S] ? "S" : "RS"); + private String getRule4ReferenceDescriptor() { + if (rule4Count == null) + return ("RCM".indexOf(auxChirality) >= 0 ? "R" : "S"); + int nR = ((Integer) rule4Count[1]).intValue(); + int nS = ((Integer) rule4Count[2]).intValue(); + return (nR > nS ? "R" : nR < nS ? "S" : "RS"); } /** * This is the key Mata method -- getting the correct sequence of R and S - * from a set of diastereomorphic paths. Given a specific reference - * designation, the task is to sort the paths based on priority (we can't - * change the base priority already determined using Rules 1-3) and - * reference. + * from a set of diastereomorphic paths. * - * We do the sort lexicographically, simply using Array.sort(String[]) with - * our reference atom temporarily given the lowest ASCII characater "A" - * (65). + * Given a specific reference descriptor, the task is to sort the paths + * based on priority and reference. We do the sort lexicographically, simply + * using Array.sort(String[]) with our reference atom temporarily given the + * lowest ASCII characater "A" (65). * - * @param lst - * @param chRef + * @param ref * @param isRule5 - * @return one string, possibly separated by | indicating that the result - * has both an R and S side to it - */ - private String getMataSequence(String[] lst, String chRef, boolean isRule5) { - int n = lst.length, len = 0; - String[] lst1 = new String[n]; - for (int j = n, i = rule4List.length; --i >= 0;) { - if (rule4List[i] != null) { - --j; - lst1[j] = lst[j]; - if (atoms[i].nextChiralBranch != null) - lst1[j] += atoms[i].nextChiralBranch.getMataList(chRef, isRule5); - } + * @return a string that can be compared with another using like/unlike + */ + private String flattenRule4Paths(char ref, boolean isRule5) { + int nPaths = rule4Paths.size(); + String[] paths = new String[nPaths]; + int nMax = 0; + for (int i = 0; i < nPaths; i++) { + // remove all enantiomorphic descriptors + String s = PT.replaceAllCharacters(rule4Paths.get(i)[0].toString(), "srctmp", "~"); + // remove all + s = s.replace(ref, 'A'); + if (s.length() > nMax) + nMax = s.length(); + paths[i] = s; } - String[] sorted = (isRule5 ? lst1 : getMataSortedList(lst1, chRef)); - for (int i = 0; i < n; i++) { - String rs = sorted[i]; - if (rs.length() > len) - len = rs.length(); - } - - // Strip out all non-R/S designations - String mlist = ""; - char ch; - for (int i = 1; i < len; i++) { - for (int j = 0; j < n; j++) { - String rs = sorted[j]; - if (i < rs.length() && (ch = rs.charAt(i)) != '~' && ch != ';') - mlist += ch; + Arrays.sort(paths); + // now remove the + for (int i = 0; i < nPaths; i++) + paths[i] = PT.replaceAllCharacters(paths[i], "1234", ""); + SB sb = new SB(); + String s; + for (int i = 0; i < nMax; i++) { + for (int k = 0; k < nPaths; k++) { + s = paths[k]; + sb.append(i < s.length() ? s.substring(i, i + 1) : "~"); } - if (isRule5) { - // clear out this sphere and resort - for (int j = 0; j < n; j++) { - String rs = sorted[j]; - if (i < rs.length()) - sorted[j] = rs.substring(0, i) + "~" + rs.substring(i + 1); - } - Arrays.sort(sorted); - } - } - return mlist; + } + return sb.toString().replace('A', ref); } /** + * Remove all unnecessary characters prior to R/S comparison. Note that at + * this time all C/T and M/P have been changed to R/S already. + * + * @param aStr + * @return clean RS-only string + */ + private String cleanRule4Str(String aStr) { + return (aStr.length() > 1 ? PT.replaceAllCharacters(aStr, "rsmpct~", "") + : aStr); + } + +// TODO: what about Rule 5?? +// +// * @param lst +// * @param chRef +// * @param isRule5 +// * @return one string, possibly separated by | indicating that the result +// * has both an R and S side to it +// */ +// private String getMataSequence(String[] lst, String chRef, boolean isRule5) { +// int n = lst.length, len = 0; +// String[] lst1 = new String[n]; +// for (int j = n, i = rule4List.length; --i >= 0;) { +// if (rule4List[i] != null) { +// --j; +// lst1[j] = lst[j]; +// if (atoms[i].nextChiralBranch != null) +// lst1[j] += atoms[i].nextChiralBranch.getMataList(chRef, isRule5); +// } +// } +// String[] sorted = (isRule5 ? lst1 : getMataSortedList(lst1, chRef)); +// for (int i = 0; i < n; i++) { +// String rs = sorted[i]; +// if (rs.length() > len) +// len = rs.length(); +// } +// +// // Strip out all non-R/S designations +// String mlist = ""; +// char ch; +// for (int i = 1; i < len; i++) { +// for (int j = 0; j < n; j++) { +// String rs = sorted[j]; +// if (i < rs.length() && (ch = rs.charAt(i)) != '~' && ch != ';') +// mlist += ch; +// } +// if (isRule5) { +// // clear out this sphere and resort +// for (int j = 0; j < n; j++) { +// String rs = sorted[j]; +// if (i < rs.length()) +// sorted[j] = rs.substring(0, i) + "~" + rs.substring(i + 1); +// } +// Arrays.sort(sorted); +// } +// } +// return mlist; +// } + + /** * Comparison of two strings such as RSSR and SRSS for Rule 4b. * * @param aStr @@ -2511,28 +2596,28 @@ return aref < bref ? A_WINS : B_WINS; } - /** - * Sort Mata list of ["RS...", "SR..."] by temporarily assigning the - * reference atom chirality the letter "A" and then sorting - * lexicographically. - * - * @param lst - * @param aref - * @return sorted list - */ - private String[] getMataSortedList(String[] lst, String aref) { - int n = lst.length; - String[] sorted = new String[n]; - for (int i = 0; i < n; i++) - sorted[i] = PT.rep(lst[i], aref, "A"); - Arrays.sort(sorted); - for (int i = 0; i < n; i++) - sorted[i] = PT.rep(sorted[i], "A", aref); - if (Logger.debuggingHigh) - for (int i = 0; i < n; i++) // Logger - Logger.info("Sorted Mata list " + i + " " + aref + ": " + sorted[i]); - return sorted; - } +// /** +// * Sort Mata list of ["RS...", "SR..."] by temporarily assigning the +// * reference atom chirality the letter "A" and then sorting +// * lexicographically. +// * +// * @param lst +// * @param aref +// * @return sorted list +// */ +// private String[] getMataSortedList(String[] lst, String aref) { +// int n = lst.length; +// String[] sorted = new String[n]; +// for (int i = 0; i < n; i++) +// sorted[i] = PT.rep(lst[i], aref, "A"); +// Arrays.sort(sorted); +// for (int i = 0; i < n; i++) +// sorted[i] = PT.rep(sorted[i], "A", aref); +// if (Logger.debuggingHigh) +// for (int i = 0; i < n; i++) // Logger +// Logger.info("Sorted Mata list " + i + " " + aref + ": " + sorted[i]); +// return sorted; +// } /** * This critical method creates a list of downstream (higher-sphere) @@ -2542,12 +2627,13 @@ * * @param node1 * first node; sphere 1 + * @param priorityPath * @param ret * CIPAtom of next stereochemical branching point * * @return collective string, with setting of rule4List */ - String createAuxiliaryRule4Data(CIPAtom node1, CIPAtom[] ret) { + String createRule4AuxiliaryData(CIPAtom node1, String priorityPath, CIPAtom[] ret) { int rs = -1; String subRS = "", s = (node1 == null ? "" : "~"); boolean isBranch = false, noPseudo = false; @@ -2563,14 +2649,14 @@ if (a != null && !a.isDuplicate && !a.isTerminal) { a.priority = priorities[i]; ret1[0] = null; - String ssub = a.createAuxiliaryRule4Data(node1 == null ? a : node1, - ret1); + String ssub = a.createRule4AuxiliaryData(node1 == null ? a : node1, + priorityPath + a.priority, ret1); if (ret1[0] != null) { a.nextChiralBranch = ret1[0]; if (ret != null) ret[0] = ret1[0]; } - rule4List[i] = a.priority + ssub; + rule4List[i] = ssub; if (a.nextChiralBranch != null || isChiralSequence(ssub)) { mataList[nRS] = i; nRS++; @@ -2590,7 +2676,7 @@ case 2: if (node1 != null) { // we want to now if these two are enantiomorphic, identical, or diastereomorphic. - adj = (compareRule4aIsomers(mataList[0], mataList[1])); + adj = (compareRule4Isomers(mataList[0], mataList[1])); switch (adj) { case TIED: // identical @@ -2632,7 +2718,7 @@ adj0 = TIED; for (int i = 0; i < 2; i++) { for (int j = i + 1; j < 3; j++) { - adj0 = (compareRule4aIsomers(mataList[i], mataList[j])); + adj0 = (compareRule4Isomers(mataList[i], mataList[j])); switch (adj0) { case A_WINS: case B_WINS: @@ -2698,7 +2784,7 @@ } if (rs != NO_CHIRALITY) { auxChirality = s; - addMataRef(sphere, priority, rs); + addMataRef(priorityPath, rs); subRS = ""; if (isSeqCT) { nextChiralBranch = alkeneChild; @@ -2725,8 +2811,10 @@ s = (adj == A_WINS ? "s" : "r"); break; } - if (noPseudo) + if (noPseudo) { s = s.toUpperCase(); // Rule 4c or diasteriomers // AY-236.148 + parent.addMataRef(priorityPath, s.equals("R") ? STEREO_R: STEREO_S); + } subRS = ""; //if (ret != null) //ret[0] = null; @@ -2742,7 +2830,7 @@ if (atom1.isPseudo) { s = s.toLowerCase(); } else { - parent.addMataRef(sphere, priority, rs); + parent.addMataRef(priorityPath, rs); } if (Logger.debugging) Logger.info("AUX " + s + " for " + atom1.myPath); @@ -2795,35 +2883,32 @@ } /** - * Accumlate the number of R and S centers at a given sphere+priority level + * Accumlate the number of R and S centers of a given cumlative priority * - * @param sphere - * 1,2,3... - * @param priority - * 1-4 - * @paramPriority + * @param priorityPath * @param rs */ - private void addMataRef(int sphere, int priority, int rs) { + private void addMataRef(String priorityPath, int rs) { if (rule4Count == null) - rule4Count = new int[] { Integer.MAX_VALUE, 0, 0 }; - int n = sphere * 10 + priority; // COUNT_LINE - if (n <= rule4Count[0]) { - if (n < rule4Count[0]) { - rule4Count[0] = n; - rule4Count[STEREO_R] = rule4Count[STEREO_S] = 0; - } - rule4Count[rs]++; + rule4Count = new Object[] { "5", zero, zero }; + switch (sign(priorityPath.compareTo(rule4Count[0].toString()))) { + case -1: + rule4Count[0] = priorityPath; + rule4Count[STEREO_R] = rule4Count[STEREO_S] = zero; + //$FALL-THROUGH$ + case 0: + rule4Count[rs] = Integer.valueOf(((Integer) rule4Count[rs]).intValue() + 1); + break; } if (Logger.debugging) Logger.info(this + " addMata " + sphere + " " + priority + " " + rs + " " + PT.toJSON("rule4Count", rule4Count)); // Logger if (parent != null) - parent.addMataRef(sphere, priority, rs); + parent.addMataRef(priorityPath, rs); } /** - * Check for enantiomeric strings such as SSR/RRS + * Check for strings such as SSR/RRS * * @param i1 * @param i2 @@ -2831,17 +2916,14 @@ * equal, A_WINS for enantiomer Rxxx, B_WINS for Sxxxx, or * DIASTERIOMERIC_A_WINS or DIASTERIOMERIC_B_WINS */ - private int compareRule4aIsomers(int i1, int i2) { + private int compareRule4Isomers(int i1, int i2) { String rs1 = rule4List[i1], rs2 = rule4List[i2]; - if (rs1.charAt(0) != rs2.charAt(0)) + if (priorities[i1] != priorities[i2] || atoms[i1].nextChiralBranch != null) return NOT_RELEVANT; int n = rs1.length(); - if (n != rs2.length()) - return NOT_RELEVANT; +// if (n != rs2.length()) +// return NOT_RELEVANT; // not possible? - if (atoms[i1].nextChiralBranch != null) - return NOT_RELEVANT; - if (rs1.equals(rs2)) { @@ -2855,7 +2937,7 @@ return TIED; } String rs = rs1 + rs2; - //System.out.println("compareRule4aIsomers:" + rs1 + " vs. " + rs2); + //System.out.println("compareRule4Isomers:" + rs1 + " vs. " + rs2); boolean haveRS = (rs.indexOf("R") >= 0 || rs.indexOf("S") >= 0); rs = (haveRS ? "~RS" : "~rs"); if (haveRS) { @@ -2862,9 +2944,9 @@ rs1 = PT.replaceAllCharacters(rs1, "rs", "~"); rs2 = PT.replaceAllCharacters(rs2, "rs", "~"); } - int score = checkEnantiomer(rs1, rs2, 1, n, rs); + int score = checkEnantiomer(rs1, rs2, 0, n, rs); if (score == DIASTEREOMERIC) { - switch (compareMataPair(i1, i2)) { + switch (compareMataPair(atoms[i1], atoms[i2])) { case A_WINS: return DIASTEREOMERIC_A_WINS; case B_WINS: @@ -3025,7 +3107,7 @@ @Override public String toString() { - return (atom == null ? "<null>" : "[" + currentRule + "." + sphere + "," + return (atom == null ? "<null>" : "[" + currentRule + "." + sphere + "." + priority + "," + id + "." + atom.getAtomName() + (isDuplicate ? "*(" + rootDistance + ")": "") + (auxChirality == null ? "" : auxChirality) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Jmol-commits mailing list Jmol-commits@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/jmol-commits