On Friday, 19 May 2017 at 07:29:44 UTC, biocyberman wrote:
I am solving this problem http://rosalind.info/problems/revc/ as an exercise to learn D. This is my solution:

https://dpaste.dzfl.pl/8aa667f962b7

Is there some D tricks I can use to make the `reverseComplement` function more concise and speedy? Any other comments for improvement of the whole solution are also much appreciated.

Question about your implementation: you assume the input may contain newlines, but don't handle any other non-ACGT characters. The problem definition states 'DNA string' and the sample dataset contains no non-ACGT chars. Is this an oversight my part or yours, or did you just decide to support more than the problem requires?


Here's a few variations I've come up with, and their timings on my machine:

import std.exception;
import std.stdio;
import std.conv;
import std.range;
import std.algorithm;
import std.datetime;
import std.meta;

string randomDna(int length) {
    import std.random;
    auto rnd = Random(unpredictableSeed);
    enum chars = ['A','C','G','T'];
    return iota(length).map!(a=>chars[uniform(0,4, rnd)]).array;
}

unittest {
    auto input = randomDna(2000);

    string previous = null;
foreach (fn; AliasSeq!(revComp0, revComp1, revComp2, revComp3)) {
        auto timing = benchmark!({fn(input);})(10_000);
writeln((&fn).stringof[2..$], ": ", timing[0].to!Duration);
        auto current = fn(input);
        if (previous != null) {
            if (current != previous) {
writeln((&fn).stringof[2..$], " did not give correct results.");
            } else {
                previous = current;
            }
        }
    }
}

// 216 ms, 3 us, and 8 hnsecs
string revComp0(string bps) {
    const N = bps.length;
    char[] result = new char[N];
    for (int i = 0; i < N; ++i) {
        result[i] = {switch(bps[N-i-1]){
            case 'A': return 'T';
            case 'C': return 'G';
            case 'G': return 'C';
            case 'T': return 'A';
            default: return '\0';
            }}();
    }
    return result.assumeUnique;
}

// 2 secs, 752 ms, and 969 us
string revComp1(string bps) {
    return bps.retro.map!((a){switch(a){
            case 'A': return 'T';
            case 'C': return 'G';
            case 'G': return 'C';
            case 'T': return 'A';
            default: assert(false);
            }}).array;
}

// 10 secs, 419 ms, 335 us, and 6 hnsecs
string revComp2(string bps) {
    enum chars = ['A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'];
    auto result = appender!string;
    foreach_reverse (c; bps) {
        result.put(chars[c]);
    }
    return result.data;
}

// 1 sec, 972 ms, 915 us, and 9 hnsecs
string revComp3(string bps) {
    const N = bps.length;
    enum chars = [Repeat!('A'-'\0', '\0'), 'T',
                Repeat!('C'-'A'-1, '\0'), 'G',
                Repeat!('G'-'C'-1, '\0'), 'C',
                Repeat!('T'-'G'-1, '\0'), 'A'];

    char[] result = new char[N];
    for (int i = 0; i < N; ++i) {
        result[i] = chars[bps[N-i-1]];
    }
    return result.assumeUnique;
}

Reply via email to