Hi,
I downloaded a GTF of refSeq Genes using the table browser.
It seems like some entries have start positions that are larger than
the stop positions, which I thought was not ment to be according to
the definition of the GTF. Is this a bug or can you give me an
explanation of why these features are listed this way?
perl -n -a -e'if (@F[3] > @F[4]) { print $_;}' RefSeq_hg19
chr1 hg19_refGene CDS 92852568 92846428 0.000000
+ 1 gene_id
"NM_024813"; transcript_id "NM_024813";
chr1 hg19_refGene CDS 151220339 151220338 0.000000
+ 0 gene_id
"NM_001135636"; transcript_id "NM_001135636";
chr1 hg19_refGene CDS 151220339 151220338 0.000000
+ 0 gene_id
"NM_003557"; transcript_id "NM_003557";
chr1 hg19_refGene CDS 151220339 151220338 0.000000
+ 0 gene_id
"NM_001135637"; transcript_id "NM_001135637";
chr1 hg19_refGene CDS 151220339 151220338 0.000000
+ 0 gene_id
"NM_001135638"; transcript_id "NM_001135638";
chr1 hg19_refGene CDS 228681584 228681583 0.000000
+ 0 gene_id
"NM_001010858"; transcript_id "NM_001010858";
chr2 hg19_refGene CDS 234115352 234115351 0.000000
+ 1 gene_id
"NM_001017915"; transcript_id "NM_001017915";
chr2 hg19_refGene CDS 234115352 234115351 0.000000
+ 1 gene_id
"NM_005541"; transcript_id "NM_005541";
chr2 hg19_refGene CDS 71211741 71211378 0.000000
+ 2 gene_id
"NM_024933"; transcript_id "NM_024933";
chr2 hg19_refGene CDS 86439549 86437734 0.000000
+ 1 gene_id
"NM_145644"; transcript_id "NM_145644";
chr2 hg19_refGene CDS 96907615 96906424 0.000000
+ 1 gene_id
"NM_001037228"; transcript_id "NM_001037228";
chr3 hg19_refGene CDS 77695205 77694054 0.000000
+ 2 gene_id
"NM_002942"; transcript_id "NM_002942";
chr3 hg19_refGene CDS 77695205 77694054 0.000000
+ 1 gene_id
"NM_001128929"; transcript_id "NM_001128929";
chr3 hg19_refGene CDS 109212986 109195607 0.000000
+ 1 gene_id
"NM_001145553"; transcript_id "NM_001145553";
chr4 hg19_refGene CDS 2929898 2928400 0.000000 + 1
gene_id
"NM_176801"; transcript_id "NM_176801";
chr4 hg19_refGene CDS 2929898 2928400 0.000000 + 1
gene_id
"NM_014190"; transcript_id "NM_014190";
chr4 hg19_refGene CDS 70811914 70810720 0.000000
+ 1 gene_id
"NM_001890"; transcript_id "NM_001890";
chr4 hg19_refGene CDS 70811914 70810720 0.000000
+ 1 gene_id
"NM_001025104"; transcript_id "NM_001025104";
chr4 hg19_refGene CDS 186321171 186320925 0.000000
+ 1 gene_id
"NM_181726"; transcript_id "NM_181726";
chr5 hg19_refGene CDS 52781517 52781056 0.000000
+ 2 gene_id
"NM_006350"; transcript_id "NM_006350";
chr5 hg19_refGene CDS 53839009 53815555 0.000000
+ 2 gene_id
"NM_001145427"; transcript_id "NM_001145427";
chr6 hg19_refGene CDS 46644384 46644383 0.000000
+ 0 gene_id
"NM_001204051"; transcript_id "NM_001204051";
chr7 hg19_refGene CDS 66105381 66104213 0.000000
+ 1 gene_id
"NM_001167961"; transcript_id "NM_001167961";
chr8 hg19_refGene CDS 30416404 30407100 0.000000
+ 1 gene_id
"NM_001008711"; transcript_id "NM_001008711";
chr8 hg19_refGene CDS 19822821 19819728 0.000000
+ 1 gene_id
"NM_000237"; transcript_id "NM_000237";
chr8 hg19_refGene CDS 24366059 24365046 0.000000
+ 1 gene_id
"NM_003817"; transcript_id "NM_003817";
chr8 hg19_refGene CDS 80577050 80575895 0.000000
+ 2 gene_id
"NM_001199214"; transcript_id "NM_001199214";
chr9 hg19_refGene CDS 71623363 71623362 0.000000
+ 0 gene_id
"NM_003558"; transcript_id "NM_003558";
chrX hg19_refGene CDS 51079091 51076309 0.000000
+ 1 gene_id
"NM_153183"; transcript_id "NM_153183";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_080802"; transcript_id "NM_080802";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_080798"; transcript_id "NM_080798";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_080800"; transcript_id "NM_080800";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_080805"; transcript_id "NM_080805";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_001130103"; transcript_id "NM_001130103";
chr10 hg19_refGene CDS 71718455 71718454 0.000000
+ 0 gene_id
"NM_080801"; transcript_id "NM_080801";
chr10 hg19_refGene CDS 79799962 79799961 0.000000
+ 0 gene_id
"NM_033022"; transcript_id "NM_033022";
chr10 hg19_refGene CDS 115540361 115535623 0.000000
+ 1 gene_id
"NM_024889"; transcript_id "NM_024889";
chr14 hg19_refGene CDS 104167538 104167537 0.000000
+ 0 gene_id
"NM_001130107"; transcript_id "NM_001130107";
chr15 hg19_refGene CDS 67599164 67599163 0.000000
+ 0 gene_id
"NM_022784"; transcript_id "NM_022784";
chr15 hg19_refGene CDS 78829642 78819904 0.000000
+ 2 gene_id
"NM_001083612"; transcript_id "NM_001083612";
chr16 hg19_refGene CDS 68260253 68260252 0.000000
+ 0 gene_id
"NM_173163"; transcript_id "NM_173163";
chr16 hg19_refGene CDS 7760625 7759131 0.000000 + 1
gene_id
"NM_145893"; transcript_id "NM_145893";
chr16 hg19_refGene CDS 28884768 28884589 0.000000
+ 2 gene_id
"NM_001145797"; transcript_id "NM_001145797";
chr17 hg19_refGene CDS 45759770 45758033 0.000000
+ 1 gene_id
"NM_002265"; transcript_id "NM_002265";
chr17 hg19_refGene CDS 40948704 40948703 0.000000
+ 0 gene_id
"NM_032387"; transcript_id "NM_032387";
chr18 hg19_refGene CDS 77287528 77287527 0.000000
+ 0 gene_id
"NM_172389"; transcript_id "NM_172389";
chr18 hg19_refGene CDS 77287528 77287527 0.000000
+ 0 gene_id
"NM_006162"; transcript_id "NM_006162";
chr18 hg19_refGene CDS 77287528 77287527 0.000000
+ 0 gene_id
"NM_172388"; transcript_id "NM_172388";
chr18 hg19_refGene CDS 21741480 21740031 0.000000
+ 1 gene_id
"NM_153770"; transcript_id "NM_153770";
chr18 hg19_refGene CDS 21741480 21740031 0.000000
+ 1 gene_id
"NM_138644"; transcript_id "NM_138644";
chr18 hg19_refGene CDS 21741480 21740031 0.000000
+ 1 gene_id
"NM_138643"; transcript_id "NM_138643";
chr18 hg19_refGene CDS 21741480 21740031 0.000000
+ 1 gene_id
"NM_153769"; transcript_id "NM_153769";
chr19 hg19_refGene CDS 1278776 1277296 0.000000 + 1
gene_id
"NM_017914"; transcript_id "NM_017914";
chr19 hg19_refGene CDS 12024224 12015701 0.000000
+ 2 gene_id
"NM_021915"; transcript_id "NM_021915";
chr20 hg19_refGene CDS 44002934 43994324 0.000000
+ 1 gene_id
"NM_001099791"; transcript_id "NM_001099791";
chr21 hg19_refGene CDS 35209291 35208935 0.000000
+ 2 gene_id
"NM_001001132"; transcript_id "NM_001001132";
chr22 hg19_refGene CDS 46239549 46239548 0.000000
+ 0 gene_id
"NM_013236"; transcript_id "NM_013236";
chr22 hg19_refGene CDS 46239549 46239548 0.000000
+ 0 gene_id
"NM_001167621"; transcript_id "NM_001167621";
Cheers,
Henrik Stranneheim
Ph.D. Student
Division of Gene Technology
Science for Life Laboratory, KISP
Royal Institute of Technology, KTH
SE-171 65 Solna, Sweden
E-mail: [email protected]
Phone: +46 (0)8 524 81487 (Office)
Phone: +46 (0)736251487 (Mobile)
Visiting address: Tometebodavägen 23A
_______________________________________________
Genome maillist - [email protected]
https://lists.soe.ucsc.edu/mailman/listinfo/genome