This is an automated email from the git hooks/post-receive script. plessy pushed a commit to branch master in repository picard-tools.
commit ff4fd825cb889fd20ac8cbcec0c288184306c620 Author: Charles Plessy <[email protected]> Date: Mon Oct 28 19:11:31 2013 +0900 Imported Upstream version 1.101 --- build.xml | 2 +- src/java/net/sf/picard/pedigree/PedFile.java | 99 +++++++++++ src/java/net/sf/picard/pedigree/PedTrio.java | 45 +++++ src/java/net/sf/picard/pedigree/Sex.java | 26 +++ src/java/net/sf/picard/util/MathUtil.java | 17 ++ src/java/net/sf/samtools/util/Iso8601Date.java | 2 +- .../net/sf/samtools/util/RelativeIso8601Date.java | 187 ++++++++++++++++++++ .../org/broadinstitute/variant/vcf/VCFCodec.java | 4 +- .../broadinstitute/variant/vcf/VCFFileReader.java | 40 +++-- .../sf/samtools/util/RelativeIso8601DateTest.java | 39 ++++ 10 files changed, 442 insertions(+), 19 deletions(-) diff --git a/build.xml b/build.xml index 5b6c3bd..e5c07bc 100755 --- a/build.xml +++ b/build.xml @@ -43,7 +43,7 @@ <!-- Get SVN revision, if available, otherwise leave it blank. --> <exec executable="svnversion" outputproperty="repository.revision" failifexecutionfails="false"/> <property name="repository.revision" value=""/> - <property name="sam-version" value="1.100"/> + <property name="sam-version" value="1.101"/> <property name="picard-version" value="${sam-version}"/> <property name="tribble-version" value="${sam-version}"/> <property name="variant-version" value="${sam-version}"/> diff --git a/src/java/net/sf/picard/pedigree/PedFile.java b/src/java/net/sf/picard/pedigree/PedFile.java new file mode 100644 index 0000000..1a429d6 --- /dev/null +++ b/src/java/net/sf/picard/pedigree/PedFile.java @@ -0,0 +1,99 @@ +package net.sf.picard.pedigree; + +import net.sf.picard.io.IoUtil; +import net.sf.picard.util.Log; +import net.sf.samtools.util.RuntimeIOException; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Pattern; + +/** + * Represents a .ped file of family information as documented here: + * http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml + * + * Stores the information in memory as a map of individualId -> Pedigree information for that individual + */ +public class PedFile extends TreeMap<String,PedTrio> { + private static final Log log = Log.getInstance(PedFile.class); + static final Pattern WHITESPACE = Pattern.compile("\\s+"); + + /** Adds a trio to the PedFile keyed by the individual id. */ + public void add(final PedTrio trio) { + put(trio.getIndividualId(), trio); + } + + /** + * Writes a set of pedigrees out to disk. + */ + public void write(final File file) { + IoUtil.assertFileIsWritable(file); + final BufferedWriter out = IoUtil.openFileForBufferedWriting(file); + + try { + for (final PedTrio trio : values()) { + out.write(trio.getFamilyId()); + out.write("\t"); + out.write(trio.getIndividualId()); + out.write("\t"); + out.write(trio.getPaternalId()); + out.write("\t"); + out.write(trio.getMaternalId()); + out.write("\t"); + out.write(String.valueOf(trio.getSex().toCode())); + out.write("\t"); + out.write(trio.getPhenotype().toString()); + out.newLine(); + } + + out.close(); + } + catch (IOException ioe) { + throw new RuntimeIOException("IOException while writing to file " + file.getAbsolutePath(), ioe); + } + } + + /** + * Attempts to read a pedigree file into memory. + */ + public static PedFile fromFile(final File file) { + final PedFile pedfile = new PedFile(); + + IoUtil.assertFileIsReadable(file); + for (final String line : IoUtil.readLines(file)) { + final String[] fields = WHITESPACE.split(line); + if (fields.length != 6) { + log.error("Ped file line contained invalid number of fields, skipping: " + line); + continue; + } + + final PedTrio trio = new PedTrio(fields[0], + fields[1], + fields[2], + fields[3], + Sex.fromCode(Integer.parseInt(fields[4])), + fields[5].contains(".") ? Double.parseDouble(fields[5]) : Integer.parseInt(fields[5]) + ); + pedfile.add(trio); + } + + return pedfile; + } + + /** + * Scans through the pedigrees and removes all entries that do not have both paternal and maternal ids set. + */ + public PedFile removeIncompleteTrios() { + final Iterator<Map.Entry<String,PedTrio>> iterator = entrySet().iterator(); + + while (iterator.hasNext()) { + if (!iterator.next().getValue().hasBothParents()) iterator.remove(); + } + + return this; + } +} diff --git a/src/java/net/sf/picard/pedigree/PedTrio.java b/src/java/net/sf/picard/pedigree/PedTrio.java new file mode 100644 index 0000000..8288625 --- /dev/null +++ b/src/java/net/sf/picard/pedigree/PedTrio.java @@ -0,0 +1,45 @@ +package net.sf.picard.pedigree; + +/** + * Represents a single trio within a ped file. + * + * @author Tim Fennell + */ +public class PedTrio { + public static final Number NO_PHENO = new Integer(-9); + public static final Sex UNKNOWN_SEX = Sex.Unknown; + + private final String familyId; + private final String individualId; + private final String paternalId; + private final String maternalId; + private final Sex sex; + private final Number phenotype; + + /** Constructs a TRIO that cannot be modified after the fact. */ + public PedTrio(final String familyId, final String individualId, final String paternalId, final String maternalId, final Sex sex, final Number phenotype) { + if (PedFile.WHITESPACE.split(familyId).length != 1) throw new IllegalArgumentException("FamilyID cannot contain whitespace: [" + familyId + "]"); + if (PedFile.WHITESPACE.split(individualId).length != 1) throw new IllegalArgumentException("IndividualID cannot contain whitespace: [" + individualId + "]"); + if (PedFile.WHITESPACE.split(paternalId).length != 1) throw new IllegalArgumentException("PaternalID cannot contain whitespace: [" + paternalId + "]"); + if (PedFile.WHITESPACE.split(maternalId).length != 1) throw new IllegalArgumentException("MaternalID cannot contain whitespace: [" + maternalId + "]"); + + this.familyId = familyId; + this.individualId = individualId; + this.paternalId = paternalId; + this.maternalId = maternalId; + this.sex = sex; + this.phenotype = phenotype; + } + + /** True if this record has paternal and maternal ids, otherwise false. */ + public boolean hasBothParents() { + return this.paternalId != null && this.maternalId != null; + } + + public String getFamilyId() { return familyId; } + public String getIndividualId() { return individualId; } + public String getPaternalId() { return paternalId; } + public String getMaternalId() { return maternalId; } + public Sex getSex() { return sex; } + public Number getPhenotype() { return phenotype; } +} diff --git a/src/java/net/sf/picard/pedigree/Sex.java b/src/java/net/sf/picard/pedigree/Sex.java new file mode 100644 index 0000000..812bc55 --- /dev/null +++ b/src/java/net/sf/picard/pedigree/Sex.java @@ -0,0 +1,26 @@ +package net.sf.picard.pedigree; + +/** + * Represents the sex of an individual. + */ +public enum Sex { + Male(1), Female(2), Unknown(-9); + + /** The integer code used when reading/writing ped files. */ + private final int code; + + /** Private constructor that takes the pedigree code for sex. */ + private Sex(final int code) { + this.code = code; + } + + /** Returns the code used to encode this sex in a ped/fam file. */ + public int toCode() { return this.code;} + + /** Decodes the Sex from a numeric code. Note that any value other than 1 or 2 will return Unknown. */ + public static Sex fromCode(final int code) { + if (code == Male.code) return Male; + else if (code == Female.code) return Female; + else return Unknown; + } +} diff --git a/src/java/net/sf/picard/util/MathUtil.java b/src/java/net/sf/picard/util/MathUtil.java index 77edaaa..192e350 100644 --- a/src/java/net/sf/picard/util/MathUtil.java +++ b/src/java/net/sf/picard/util/MathUtil.java @@ -147,4 +147,21 @@ public class MathUtil { return tmp; } + + /** Calculates the product of two arrays of the same length. */ + public static double[] multiply(final double[] lhs, final double[] rhs) { + if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length."); + + final int len = lhs.length; + final double[] result = new double[len]; + for (int i=0; i<len; ++i) result[i] = lhs[i] * rhs[i]; + return result; + } + + /** Returns the sum of the elements in the array. */ + public static double sum(final double[] arr) { + double result = 0; + for (final double next : arr) result += next; + return result; + } } diff --git a/src/java/net/sf/samtools/util/Iso8601Date.java b/src/java/net/sf/samtools/util/Iso8601Date.java index 96b234b..3d969eb 100644 --- a/src/java/net/sf/samtools/util/Iso8601Date.java +++ b/src/java/net/sf/samtools/util/Iso8601Date.java @@ -59,7 +59,7 @@ public class Iso8601Date extends Date { long time = getTime(); long mod = time % 1000; if (mod != 0) { - setTime(time - mod); + super.setTime(time - mod); } } } diff --git a/src/java/net/sf/samtools/util/RelativeIso8601Date.java b/src/java/net/sf/samtools/util/RelativeIso8601Date.java new file mode 100644 index 0000000..0a9a4b4 --- /dev/null +++ b/src/java/net/sf/samtools/util/RelativeIso8601Date.java @@ -0,0 +1,187 @@ +package net.sf.samtools.util; + +import java.util.Date; + +/** + * Like {@link Iso8601Date}, but also comes in a "lazy now" flavor. + * + * When "lazy now" mode is enabled, this instance's date value is undefined until the first time it is queried, at which time it is set to + * {@link System#currentTimeMillis()}. This value is returned on subsequent queries, so it is consistent. + * + * The "lazy state" is conveyed via {@link #toString()}. A "lazy now" instance will answer {@link #toString()} with + * {@link #LAZY_NOW_LABEL} if the time has not yet been queried/set, or a {@link Iso8601Date}-formatted date of the query time if it + * has been queried. This characteristic is useful for serialization and persistence purposes. + * + * Consumers can create "lazy now" instances via the {@link #generateLazyNowInstance()} factory method or by passing {@link #LAZY_NOW_LABEL} to + * {@link #RelativeIso8601Date(String)}. + * + * @author mccowan + */ +public class RelativeIso8601Date extends Iso8601Date { + + public static final String LAZY_NOW_LABEL = "NOW"; + + /** Flag that indicates this instance is lazy and has not yet been queried (and so its value should be updated at the next query). */ + private boolean doSetTimeNextQuery; + + /** Returns a "lazy now" instance. */ + public static RelativeIso8601Date generateLazyNowInstance() { + return new RelativeIso8601Date(LAZY_NOW_LABEL); + } + + public RelativeIso8601Date(final Date date) { + super(date); + doSetTimeNextQuery = false; + } + + public RelativeIso8601Date(final String dateStr) { + /** + * We must pass a date parsable {@link Iso8601Date#Iso8601Date(String)}; we will never actually read the passed value, so it doesn't + * matter what it is. + */ + super(LAZY_NOW_LABEL.equals(dateStr) ? new Iso8601Date(new Date()).toString() : dateStr); + doSetTimeNextQuery = LAZY_NOW_LABEL.equals(dateStr); + } + + /** Updates the time stored by this instance if it's a "lazy now" instance and has never been stored. */ + private synchronized void conditionallyUpdateTime() { + if (!doSetTimeNextQuery) { + super.setTime(System.currentTimeMillis()); + doSetTimeNextQuery = false; + } + } + + /** + * Returns a {@link String} representation of this date. + * @return An {@link Iso8601Date}-formatted string, or the value of {@link #LAZY_NOW_LABEL} if this is a "lazy now" instance. + */ + @Override + public String toString() { + return doSetTimeNextQuery ? LAZY_NOW_LABEL : super.toString(); + } + + @Override + public long getTime() { + conditionallyUpdateTime(); + return super.getTime(); + } + + @Override + public boolean after(final Date when) { + conditionallyUpdateTime(); + return super.after(when); + } + + @Override + public boolean before(final Date when) { + conditionallyUpdateTime(); + return super.before(when); + } + + @Override + public Object clone() { + conditionallyUpdateTime(); + return super.clone(); + } + + @Override + public int compareTo(final Date anotherDate) { + conditionallyUpdateTime(); + return super.compareTo(anotherDate); + } + + @Override + public boolean equals(final Object obj) { + conditionallyUpdateTime(); + return super.equals(obj); + } + + @Override + public int getDate() { + conditionallyUpdateTime(); + return super.getDate(); + } + + @Override + public int getDay() { + conditionallyUpdateTime(); + return super.getDay(); + } + + @Override + public int getHours() { + conditionallyUpdateTime(); + return super.getHours(); + } + + @Override + public int getMinutes() { + conditionallyUpdateTime(); + return super.getMinutes(); + } + + @Override + public int getMonth() { + conditionallyUpdateTime(); + return super.getMonth(); + } + + @Override + public int getSeconds() { + conditionallyUpdateTime(); + return super.getSeconds(); + } + + @Override + public int getTimezoneOffset() { + conditionallyUpdateTime(); + return super.getTimezoneOffset(); + } + + @Override + public int getYear() { + conditionallyUpdateTime(); + return super.getYear(); + } + + @Override + public int hashCode() { + conditionallyUpdateTime(); + return super.hashCode(); + } + + @Override + public void setDate(final int date) { + throw new UnsupportedOperationException(); + } + + @Override + public void setHours(final int hours) { + throw new UnsupportedOperationException(); + } + + @Override + public void setMinutes(final int minutes) { + throw new UnsupportedOperationException(); + } + + @Override + public void setMonth(final int month) { + throw new UnsupportedOperationException(); + } + + @Override + public void setSeconds(final int seconds) { + throw new UnsupportedOperationException(); + } + + @Override + public void setTime(final long time) { + throw new UnsupportedOperationException(); + } + + @Override + public void setYear(final int year) { + throw new UnsupportedOperationException(); + } +} diff --git a/src/java/org/broadinstitute/variant/vcf/VCFCodec.java b/src/java/org/broadinstitute/variant/vcf/VCFCodec.java index 50a4f08..5107ff7 100644 --- a/src/java/org/broadinstitute/variant/vcf/VCFCodec.java +++ b/src/java/org/broadinstitute/variant/vcf/VCFCodec.java @@ -121,7 +121,7 @@ public class VCFCodec extends AbstractVCFCodec { * @param filterString the string to parse * @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF) */ - protected List<String> parseFilters(String filterString) { + protected List<String> parseFilters(final String filterString) { // null for unfiltered if ( filterString.equals(VCFConstants.UNFILTERED) ) return null; @@ -138,7 +138,7 @@ public class VCFCodec extends AbstractVCFCodec { return filterHash.get(filterString); // empty set for passes filters - List<String> fFields = new LinkedList<String>(); + final List<String> fFields = new LinkedList<String>(); // otherwise we have to parse and cache the value if ( !filterString.contains(VCFConstants.FILTER_CODE_SEPARATOR) ) fFields.add(filterString); diff --git a/src/java/org/broadinstitute/variant/vcf/VCFFileReader.java b/src/java/org/broadinstitute/variant/vcf/VCFFileReader.java index 3f3eb3a..6ed9251 100644 --- a/src/java/org/broadinstitute/variant/vcf/VCFFileReader.java +++ b/src/java/org/broadinstitute/variant/vcf/VCFFileReader.java @@ -13,7 +13,10 @@ import java.io.Closeable; import java.io.File; import java.io.IOException; -public class VCFFileReader implements Closeable { +/** + * Simplified interface for reading from VCF/BCF files. + */ +public class VCFFileReader implements Closeable, Iterable<VariantContext> { private final FeatureReader<VariantContext> reader; @@ -33,37 +36,44 @@ public class VCFFileReader implements Closeable { return dict; } + /** Constructs a VCFFileReader that requires the index to be present. */ public VCFFileReader(final File file) { this(file, true); } - public VCFFileReader(final File file, boolean requireIndex) { - this.reader = - AbstractFeatureReader.getFeatureReader( + /** Allows construction of a VCFFileReader that will or will not assert the presence of an index as desired. */ + public VCFFileReader(final File file, final boolean requireIndex) { + this.reader = AbstractFeatureReader.getFeatureReader( file.getAbsolutePath(), - isBCF(file) - ? new BCF2Codec() - : new VCFCodec(), + isBCF(file) ? new BCF2Codec() : new VCFCodec(), requireIndex); } + /** Returns the VCFHeader associated with this VCF/BCF file. */ public VCFHeader getFileHeader() { return (VCFHeader) reader.getHeader(); } + /** Returns an iterator over all records in this VCF/BCF file. */ public CloseableIterator<VariantContext> iterator() { - try { - return reader.iterator(); - } catch (final IOException ioe) { - throw new TribbleException("Could not create an iterator from a feature reader: " + ioe.getMessage(), ioe); + try { return reader.iterator(); } + catch (final IOException ioe) { + throw new TribbleException("Could not create an iterator from a feature reader.", ioe); } } + /** Queries for records within the region specified. */ + public CloseableIterator<VariantContext> query(final String chrom, final int start, final int end) { + try { return reader.query(chrom, start, end); } + catch (final IOException ioe) { + throw new TribbleException("Could not create an iterator from a feature reader.", ioe); + } + } + public void close() { - try { - this.reader.close(); - } catch (final IOException ioe) { - throw new TribbleException("Could not close a variant context feature reader: " + ioe.getMessage(), ioe); + try { this.reader.close(); } + catch (final IOException ioe) { + throw new TribbleException("Could not close a variant context feature reader.", ioe); } } } diff --git a/src/tests/java/net/sf/samtools/util/RelativeIso8601DateTest.java b/src/tests/java/net/sf/samtools/util/RelativeIso8601DateTest.java new file mode 100644 index 0000000..f4a91f0 --- /dev/null +++ b/src/tests/java/net/sf/samtools/util/RelativeIso8601DateTest.java @@ -0,0 +1,39 @@ +package net.sf.samtools.util; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +/** @author mccowan */ + +public class RelativeIso8601DateTest { + @Test + public void testLazyInstance() { + final RelativeIso8601Date lazy = RelativeIso8601Date.generateLazyNowInstance(); + Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL); + Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL); + Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL); + Assert.assertEquals((double) lazy.getTime(), (double) System.currentTimeMillis(), 2000d); // Up to 2 seconds off; iso truncates milliseconds. + // Assert no exception thrown; this should be valid, because toString should now return an iso-looking date. + new RelativeIso8601Date(lazy.toString()); + } + + @Test + public void testNonLazyInstance() { + // Test both constructor methods + final List<RelativeIso8601Date> testDates = Arrays.<RelativeIso8601Date>asList( + new RelativeIso8601Date(new Date()), + new RelativeIso8601Date(new Iso8601Date(new Date()).toString()) + ); + + for (final RelativeIso8601Date nonLazy : testDates) { + Assert.assertFalse(nonLazy.toString().equals(RelativeIso8601Date.LAZY_NOW_LABEL)); + Assert.assertEquals((double) nonLazy.getTime(), (double) System.currentTimeMillis(), 1d); + // Assert no exception thrown; this should be valid, because toString return an iso-looking date. + new RelativeIso8601Date(nonLazy.toString()); + } + } +} -- Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
