Re: [Bioc-sig-seq] Resizing a GRanges object with elements on the "*" strand

Leonardo Collado Torres Tue, 21 Sep 2010 09:49:55 -0700

Hello Michal and Ivan,

Thanks for the replies and your input :)


In my use case all elements are from the  "*" strand. In mixed cases as in
Ivan's example, the two workarounds I posted earlier do not work. I say so,
since I'm expecting just what Michael said: to treat "*" elements as from
the "+" strand.

I tried using the method Michael coded, but I do not know how to call a
specific method. This must be pretty basic for you, but what is the way to
do so?

Greetings,
Leonardo

> library(GenomicRanges)

## Ivan's example GRanges
> C <- GRanges(seqnames=c("chr1","chr2","chr19","chrX"),
+             ranges=IRanges(start=c(0,0,5,1),
+                            end=c(150,150,150,400)),
+             strand=c("*","-","*","+"),
+             score=c(10,20,30,90))
> C
GRanges with 4 ranges and 1 elementMetadata value
    seqnames    ranges strand |     score
       <Rle> <IRanges>  <Rle> | <numeric>
[1]     chr1  [0, 150]      * |        10
[2]     chr2  [0, 150]      - |        20
[3]    chr19  [5, 150]      * |        30
[4]     chrX  [1, 400]      + |        90

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA

## The flank + shift workaround does not work, as the element on the "-"
strand is moved to position 152 instead of 150.
> flank(shift(C, 1), 1)
GRanges with 4 ranges and 1 elementMetadata value
    seqnames     ranges strand |     score
       <Rle>  <IRanges>  <Rle> | <numeric>
[1]     chr1 [  0,   0]      * |        10
[2]     chr2 [152, 152]      - |        20
[3]    chr19 [  5,   5]      * |        30
[4]     chrX [  1,   1]      + |        90

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA

## The workaround where you redifine the GRanges produces the same result as
Ivan's workaround. However, I would expect the "-" strand element "start" to
be position 150 and not position 0.
> GRanges( seqnames = seqnames(C), ranges = IRanges( start = start(C),
width=1), strand = strand(C))
GRanges with 4 ranges and 0 elementMetadata values
    seqnames    ranges strand |
       <Rle> <IRanges>  <Rle> |
[1]     chr1    [0, 0]      * |
[2]     chr2    [0, 0]      - |
[3]    chr19    [5, 5]      * |
[4]     chrX    [1, 1]      + |

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA

## Current output of the resize function:
> resize(C, fix="start", width=1)
GRanges with 4 ranges and 1 elementMetadata value
    seqnames     ranges strand |     score
       <Rle>  <IRanges>  <Rle> | <numeric>
[1]     chr1 [ 75,  75]      * |        10
[2]     chr2 [150, 150]      - |        20
[3]    chr19 [ 77,  77]      * |        30
[4]     chrX [  1,   1]      + |        90

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA

## Methods available for "resize"
> showMethods("resize")
Function: resize (package IRanges)
x="CompressedIRangesList"
x="GRanges"
x="IRanges"
    (inherited from: x="Ranges")
x="NormalIRanges"
x="Ranges"
x="RangesList"

## Failed attempt to use Michael's method
> setMethod("resize", "GenomicRanges",
+           function(x, width, fix = "start", use.names = TRUE)
+           {
+             revFix <- c(start = "end", end = "start", center = "center")
+             fix <- ifelse(strand(x) == "-", revFix[fix], fix)
+             ranges <-
+               resize(ranges(x), width = width, fix = fix, use.names =
use.names)
+             if (!IRanges:::anyMissing(seqlengths(x))) {
+               start(x) <- start(ranges)
+               end(x) <- end(ranges)
+             } else {
+               x <- clone(x, ranges = ranges)
+             }
+             x
+           }
+           )
[1] "resize"

## Methods available for resize part II:
> showMethods("resize")
Function: resize (package IRanges)
x="CompressedIRangesList"
x="GenomicRanges"
x="GRanges"
x="IRanges"
    (inherited from: x="Ranges")
x="NormalIRanges"
x="Ranges"
x="RangesList"

## Will it use the GRanges method or the GenomicRanges method for "C"?
> class(C)
[1] "GRanges"
attr(,"package")
[1] "GenomicRanges"

## It produces the same result from above. How can I use the new
"GenomicRanges" method for "resize" with the example GRanges "C"? My guess
is that it uses the "GRanges" method instead of the "GenomicRanges" one.
> resize(C, fix="start", width=1)
GRanges with 4 ranges and 1 elementMetadata value
    seqnames     ranges strand |     score
       <Rle>  <IRanges>  <Rle> | <numeric>
[1]     chr1 [ 75,  75]      * |        10
[2]     chr2 [150, 150]      - |        20
[3]    chr19 [ 77,  77]      * |        30
[4]     chrX [  1,   1]      + |        90

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA


### Manually edited text:
## This is the output I would expect from "resize"
> resize(C, fix="start", width=1)
GRanges with 4 ranges and 1 elementMetadata value
    seqnames     ranges strand |     score
       <Rle>  <IRanges>  <Rle> | <numeric>
[1]     chr1 [  0,   0]      * |        10
[2]     chr2 [150, 150]      - |        20
[3]    chr19 [  5,   5]      * |        30
[4]     chrX [  1,   1]      + |        90
### End of manually edited text

seqlengths
  chr1 chr19  chr2  chrX
    NA    NA    NA    NA

## I updated GenomicRanges and IRanges using biocLite prior to running the
above pieces of code.
> sessionInfo()
R version 2.12.0 Under development (unstable) (2010-09-08 r52880)
Platform: x86_64-unknown-linux-gnu (64-bit)

locale:
 [1] LC_CTYPE=en_US.utf8       LC_NUMERIC=C
 [3] LC_TIME=en_US.utf8        LC_COLLATE=en_US.utf8
 [5] LC_MONETARY=C             LC_MESSAGES=en_US.utf8
 [7] LC_PAPER=en_US.utf8       LC_NAME=C
 [9] LC_ADDRESS=C              LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.utf8 LC_IDENTIFICATION=C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] GenomicRanges_1.1.25 IRanges_1.7.34



On Tue, Sep 14, 2010 at 4:13 PM, Ivan Gregoretti <[email protected]> wrote:

> Hi Michael, since you are asking for opinions...
>
>
> When specifying the 'fix=' argument:
>
> In my view, fix="start" should always resize at the "start" regardless
> of strandedness.
>
>
>
> Default behaviour (when not specifying the 'fix=' argument):
>
> When the strand is "*", I would expect resize() to default to
> fix="center" rather than "start".
>
> When strands are "+" and "-" I would expect resize to default to
> fix="start" and fix="end' respectively.
>
>
>
> Thank you,
>
> Ivan
>
>
>
> On Tue, Sep 14, 2010 at 4:46 PM, Michael Lawrence
> <[email protected]> wrote:
> > I just checked in some changes to IRanges, that make this method work:
> >
> > setMethod("resize", "GenomicRanges",
> >           function(x, width, fix = "start", use.names = TRUE)
> >           {
> >             revFix <- c(start = "end", end = "start", center = "center")
> >             fix <- ifelse(strand(x) == "-", revFix[fix], fix)
> >             ranges <-
> >               resize(ranges(x), width = width, fix = fix, use.names =
> > use.names)
> >             if (!IRanges:::anyMissing(seqlengths(x))) {
> >               start(x) <- start(ranges)
> >               end(x) <- end(ranges)
> >             } else {
> >               x <- clone(x, ranges = ranges)
> >             }
> >             x
> >           }
> >           )
> >
> > That will accept the fix argument, except start and end are reversed for
> > negative strand features. '*' is treated just like '+'. If this is
> > acceptable to the GenomicRanges guys, I will commit this.
> >
> > On Tue, Sep 14, 2010 at 9:36 AM, Ivan Gregoretti <[email protected]>
> wrote:
> >>
> >> Hello Leonardo,
> >>
> >> I believe that the issue here is that resize() does not support the
> >> "fix" argument at all when handling GRanges.
> >>
> >> Actually that would be a nice upgrade of functionality for GRanges.
> >>
> >> I face the same limitation and I currently resize by hand. :(
> >>
> >> This is my work around:
> >>
> >> library(GenomicRanges)
> >>
> >> # a set of genomic features called C
> >> C <- GRanges(seqnames=c("chr1","chr2","chr19","chrX"),
> >>             ranges=IRanges(start=c(0,0,5,1),
> >>                            end=c(150,150,150,400)),
> >>             strand=c("*","-","*","+"),
> >>             score=c(10,20,30,90))
> >>
> >>
> >> # peek at C
> >> C
> >> GRanges with 4 ranges and 1 elementMetadata value
> >>    seqnames    ranges strand |     score
> >>       <Rle> <IRanges>  <Rle> | <numeric>
> >> [1]     chr1  [0, 150]      * |        10
> >> [2]     chr2  [0, 150]      - |        20
> >> [3]    chr19  [5, 150]      * |        30
> >> [4]     chrX  [1, 400]      + |        90
> >>
> >> seqlengths
> >>  chr1 chr19  chr2  chrX
> >>    NA    NA    NA    NA
> >>
> >> # this is the workaround
> >> ranges(C) <- resize(ranges(C),1,fix="start")
> >>
> >> # peek at the resized set C
> >> C
> >> GRanges with 4 ranges and 1 elementMetadata value
> >>    seqnames    ranges strand |     score
> >>       <Rle> <IRanges>  <Rle> | <numeric>
> >> [1]     chr1    [0, 0]      * |        10
> >> [2]     chr2    [0, 0]      - |        20
> >> [3]    chr19    [5, 5]      * |        30
> >> [4]     chrX    [1, 1]      + |        90
> >>
> >> seqlengths
> >>  chr1 chr19  chr2  chrX
> >>    NA    NA    NA    NA
> >>
> >>
> >> Cheers,
> >>
> >> Ivan
> >>
> >>
> >> Ivan Gregoretti, PhD
> >> National Institute of Diabetes and Digestive and Kidney Diseases
> >> National Institutes of Health
> >> 5 Memorial Dr, Building 5, Room 205.
> >> Bethesda, MD 20892. USA.
> >> Phone: 1-301-496-1016 and 1-301-496-1592
> >> Fax: 1-301-496-9878
> >>
> >>
> >>
> >> On Tue, Sep 14, 2010 at 11:36 AM, Leonardo Collado Torres
> >> <[email protected]> wrote:
> >> > Hello,
> >> >
> >> > I have a rather simple question that involves GenomicRanges' design.
> >> >
> >> > Basically, I have a GRanges object where all the elements are from the
> >> > undefined "*" strand. I just want to resize them to get the 1st (from
> >> > left
> >> > to right) base. However, I'm not able to do so with the "resize"
> >> > function
> >> > even when specifying fix = "start" as it uses the fix = "center"
> method.
> >> > Is
> >> > this the desired performance? I have 2 workarounds, but I'm puzzled as
> >> > the
> >> > "flank" function actually uses the start (left to right) when elements
> >> > are
> >> > from the "*" strand. Is there a quicker way to do this or should I
> stick
> >> > to
> >> > the flank + shift workaround?
> >> >
> >> > Thank you and greetings,
> >> > Leonardo
> >> >
> >> >> testGR <- GRanges( seqnames = rep("test", 3), ranges = IRanges (
> start
> >> >> =
> >> > c(10,100,1000), width = c(10, 100, 1000)), strand =
> >> > Rle(strand(c("+","-")),
> >> > c(1,2)) )
> >> >> testGR
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  10,   19]      + |
> >> > [2]     test [ 100,  199]      - |
> >> > [3]     test [1000, 1999]      - |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >> resize(testGR, 1, fix="start")
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  10,   10]      + |
> >> > [2]     test [ 199,  199]      - |
> >> > [3]     test [1999, 1999]      - |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >> testGR2 <- GRanges( seqnames = rep("test", 3), ranges = IRanges (
> start
> >> >> =
> >> > c(10,100,1000), width = c(10, 100, 1000)), strand =
> Rle(strand(c("*")),
> >> > c(3)) )
> >> >> testGR2
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  10,   19]      * |
> >> > [2]     test [ 100,  199]      * |
> >> > [3]     test [1000, 1999]      * |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >> resize(testGR2, 1, fix="start")
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  14,   14]      * |
> >> > [2]     test [ 149,  149]      * |
> >> > [3]     test [1499, 1499]      * |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >
> >> >> testGR3 <- GRanges ( seqnames = seqnames(testGR2), ranges = IRanges(
> >> >> start
> >> > = start(testGR2), width = 1), strand = strand(testGR2) )
> >> >>
> >> >> testGR3
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  10,   10]      * |
> >> > [2]     test [ 100,  100]      * |
> >> > [3]     test [1000, 1000]      * |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >>
> >> >
> >> >> testGR4 <- shift(flank( testGR2, 1), 1)
> >> >> testGR4
> >> > GRanges with 3 ranges and 0 elementMetadata values
> >> >    seqnames       ranges strand |
> >> >       <Rle>    <IRanges>  <Rle> |
> >> > [1]     test [  10,   10]      * |
> >> > [2]     test [ 100,  100]      * |
> >> > [3]     test [1000, 1000]      * |
> >> >
> >> > seqlengths
> >> >  test
> >> >   NA
> >> >
> >> >> sessionInfo()
> >> > R version 2.12.0 Under development (unstable) (2010-09-08 r52880)
> >> > Platform: x86_64-unknown-linux-gnu (64-bit)
> >> >
> >> > locale:
> >> >  [1] LC_CTYPE=en_US.utf8       LC_NUMERIC=C
> >> >  [3] LC_TIME=en_US.utf8        LC_COLLATE=en_US.utf8
> >> >  [5] LC_MONETARY=C             LC_MESSAGES=en_US.utf8
> >> >  [7] LC_PAPER=en_US.utf8       LC_NAME=C
> >> >  [9] LC_ADDRESS=C              LC_TELEPHONE=C
> >> > [11] LC_MEASUREMENT=en_US.utf8 LC_IDENTIFICATION=C
> >> >
> >> > attached base packages:
> >> > [1] stats     graphics  grDevices utils     datasets  methods   base
> >> >
> >> > other attached packages:
> >> > [1] GenomicRanges_1.1.25 IRanges_1.7.33
> >> >
> >> > loaded via a namespace (and not attached):
> >> > [1] tools_2.12.0
> >> >
> >> >        [[alternative HTML version deleted]]
> >> >
> >> > _______________________________________________
> >> > Bioc-sig-sequencing mailing list
> >> > [email protected]
> >> > https://stat.ethz.ch/mailman/listinfo/bioc-sig-sequencing
> >> >
> >>
> >> _______________________________________________
> >> Bioc-sig-sequencing mailing list
> >> [email protected]
> >> https://stat.ethz.ch/mailman/listinfo/bioc-sig-sequencing
> >
> >
>

        [[alternative HTML version deleted]]

_______________________________________________
Bioc-sig-sequencing mailing list
[email protected]
https://stat.ethz.ch/mailman/listinfo/bioc-sig-sequencing

Re: [Bioc-sig-seq] Resizing a GRanges object with elements on the "*" strand

Reply via email to