Thanks - I checked through and it looks as if all of the geneids are
formatted similarily so I don't know which one would be causing an error.
Interestingly, your sapply method works on the same data. So I'm happy
although still confused, because the strsplit method worked the other
day with a similarly generated dataset.
I dumped my entire dataframe below. Incase anyone wants to investigate.
Alison
Rumino_Reps_agreeWalign$geneid.prefix <- sapply(gene.list, "[", 1)
Rumino_Reps_agreeWalign$geneid.suffix <- sapply(gene.list, "[", 2)
> dput(Rumino_Reps_agreeWalign)
structure(list(geneid = c("657313.locus_tag:RTO_08940", "457412.251848018",
"657314.locus_tag:CK5_20630", "657323.locus_tag:CK1_33060",
"657313.locus_tag:RTO_09690",
"471875.197297106", "411470.DS231493.G14", "411459.149830627",
"657313.locus_tag:RTO_09720", "411460.145845997", "411459.149831369",
"657321.locus_tag:RBR_01830", "411460.145846414", "457412.251848805",
"657321.locus_tag:RBR_08030", "471875.197296907", "457412.251847995",
"657314.locus_tag:CK5_20840", "411460.145846423",
"657314.locus_tag:CK5_25030",
"457412.251847990", "471875.197297117", "471875.197299322",
"411459.149831093",
"411459.149831815", "411460.145846434", "213810.locus_tag:RUM_09700",
"657314.locus_tag:CK5_09460", "657323.locus_tag:CK1_18840",
"471875.197297108",
"411460.145846680", "411459.149831368", "657314.locus_tag:CK5_19120",
"657321.locus_tag:RBR_09560", "411460.145846435",
"657323.locus_tag:CK1_11530",
"457412.251850723", "213810.locus_tag:RUM_12960",
"213810.locus_tag:RUM_14740",
"213810.locus_tag:RUM_07030", "471875.197296936", "411459.149831092",
"471875.197297110", "471875.197298135", "411460.145846430",
"657314.locus_tag:CK5_20370",
"657313.locus_tag:RTO_09790", "657323.locus_tag:CK1_33050",
"411460.145846407",
"457412.251849909", "411460.145846340", "657313.locus_tag:RTO_14810",
"457412.251848010", "457412.251850599", "657323.locus_tag:CK1_33200",
"657323.locus_tag:CK1_33190", "213810.locus_tag:RUM_03050",
"657314.locus_tag:CK5_09880",
"213810.locus_tag:RUM_15180", "657313.locus_tag:RTO_14610",
"657313.locus_tag:RTO_23930",
"411459.149830473", "657313.locus_tag:RTO_18090",
"657323.locus_tag:CK1_27940",
"657314.locus_tag:CK5_20720", "411459.149831855", "471875.197297691",
"411459.149833320", "457412.251849358", "657321.locus_tag:RBR_13130",
"411459.149831077", "471875.197297272", "657314.locus_tag:CK5_09370",
"457412.251847994", "411459.149831080", "657314.locus_tag:CK5_20730",
"457412.251850579", "213810.locus_tag:RUM_14870",
"657321.locus_tag:RBR_01750",
"657313.locus_tag:RTO_09660", "657314.locus_tag:CK5_28910",
"411460.145846907",
"657313.locus_tag:RTO_09860", "457412.251847996",
"657323.locus_tag:CK1_38480",
"411460.145846417", "471875.197297592", "411459.149831814",
"457412.251848016",
"411459.149831804", "657323.locus_tag:CK1_32880",
"657321.locus_tag:RBR_08130",
"411460.145846429", "657313.locus_tag:RTO_09880",
"213810.locus_tag:RUM_03410",
"657313.locus_tag:RTO_09740", "657313.locus_tag:RTO_09840",
"457412.251848009",
"657323.locus_tag:CK1_33090", "657323.locus_tag:CK1_25000",
"411459.149831095",
"411459.149830934", "457412.251847970", "457412.251848000",
"657314.locus_tag:CK5_20680",
"411459.149831088", "657323.locus_tag:CK1_19350",
"657321.locus_tag:RBR_08670",
"471875.197299547", "411459.149831081", "657323.locus_tag:CK1_32550",
"411459.149831091", "657313.locus_tag:RTO_24580", "457412.251848004",
"471875.197297195", "411460.145846602", "657321.locus_tag:RBR_06200",
"213810.locus_tag:RUM_19570", "411460.145846361", "411459.149833804",
"657323.locus_tag:CK1_32930", "471875.197296906", "411459.149831078",
"657321.locus_tag:RBR_09900", "411460.145846496",
"657321.locus_tag:RBR_08260",
"411459.149833021", "657313.locus_tag:RTO_02600",
"657323.locus_tag:CK1_33030",
"657313.locus_tag:RTO_09750", "213810.locus_tag:RUM_14790",
"457412.251848017",
"457412.251848806", "457412.251847640", "657314.locus_tag:CK5_20620",
"411459.149830474", "657323.locus_tag:CK1_11750",
"213810.locus_tag:RUM_09690",
"457412.251847999", "657321.locus_tag:RBR_05870", "411460.145846409",
"657313.locus_tag:RTO_16220", "657321.locus_tag:RBR_10630",
"411459.149833026",
"457412.251847997", "657313.locus_tag:RTO_09650", "471875.197297129",
"471875.197297112", "213810.locus_tag:RUM_14720", "457412.251847991",
"657313.locus_tag:RTO_09730", "471875.197297132",
"657313.locus_tag:RTO_14650",
"411470.DS231491.G186", "457412.251849520", "657323.locus_tag:CK1_04710",
"657323.locus_tag:CK1_04510", "411460.145846182", "411460.145846883",
"657321.locus_tag:RBR_08040", "411459.149833983", "457412.251849519",
"471875.197297124", "457412.251849906", "657321.locus_tag:RBR_08010",
"657321.locus_tag:RBR_03380", "657323.locus_tag:CK1_20230",
"471875.197297115",
"657323.locus_tag:CK1_13100", "657323.locus_tag:CK1_32950",
"411460.145846428",
"471875.197297120", "213810.locus_tag:RUM_13040",
"657314.locus_tag:CK5_25080",
"411459.149831096", "411459.149831090", "411459.149833331",
"411459.149831370",
"657313.locus_tag:RTO_26330", "411459.149833340",
"657314.locus_tag:CK5_20590",
"411460.145846458", "471875.197297290", "657313.locus_tag:RTO_09850",
"213810.locus_tag:RUM_12130", "657323.locus_tag:CK1_32910",
"213810.locus_tag:RUM_09770",
"657313.locus_tag:RTO_09640", "657313.locus_tag:RTO_09830",
"457412.251849013",
"411460.145847544", "657323.locus_tag:CK1_33040",
"213810.locus_tag:RUM_23250",
"657314.locus_tag:CK5_20580", "411459.149831082", "471875.197297125",
"657314.locus_tag:CK5_14780", "657321.locus_tag:RBR_03820",
"213810.locus_tag:RUM_06600",
"657314.locus_tag:CK5_20610", "657321.locus_tag:RBR_08120",
"657314.locus_tag:CK5_20770",
"471875.197297119", "657313.locus_tag:RTO_10610",
"657321.locus_tag:RBR_08270",
"657323.locus_tag:CK1_32920", "457412.251849800", "411460.145846603",
"411459.149830653", "411459.149833020", "411459.149831085",
"411459.149833803",
"657323.locus_tag:CK1_32990", "471875.197297121", "411459.149833164",
"657313.locus_tag:RTO_14600", "457412.251848005",
"657314.locus_tag:CK5_10670",
"213810.locus_tag:RUM_14730", "411459.149831367",
"657314.locus_tag:CK5_20640",
"657321.locus_tag:RBR_15140", "411460.145847269",
"657323.locus_tag:CK1_04820",
"457412.251848673", "471875.197296932", "411459.149831083",
"657323.locus_tag:CK1_33080",
"657321.locus_tag:RBR_08170", "657314.locus_tag:CK5_20740",
"657321.locus_tag:RBR_08100",
"657314.locus_tag:CK5_20850", "471875.197297111",
"657313.locus_tag:RTO_17750",
"471875.197297308", "657314.locus_tag:CK5_00900",
"657313.locus_tag:RTO_03810",
"471875.197297779", "411460.145848384", "657313.locus_tag:RTO_29320",
"657314.locus_tag:CK5_20780", "471875.197299321", "411460.145846431",
"471875.197298831", "471875.197297131", "657323.locus_tag:CK1_30770",
"457412.251848007", "657314.locus_tag:CK5_25320",
"213810.locus_tag:RUM_03700",
"657313.locus_tag:RTO_19560", "411460.145846432",
"657314.locus_tag:CK5_29790",
"411460.145848483", "657323.locus_tag:CK1_32890", "411460.145846406",
"657321.locus_tag:RBR_15270", "657321.locus_tag:RBR_18050",
"657314.locus_tag:CK5_20650",
"657323.locus_tag:CK1_33210", "411459.149831086", "457412.251847993",
"411459.149831051", "411460.145846418", "657321.locus_tag:RBR_07990",
"411459.149830912", "471875.197298686", "457412.251850588",
"457412.251848006",
"657314.locus_tag:CK5_17510", "657313.locus_tag:RTO_05370",
"457412.251849359",
"471875.197297105", "657313.locus_tag:RTO_09820",
"657323.locus_tag:CK1_25830",
"471875.197297130", "657314.locus_tag:CK5_09290", "457412.251848019",
"471875.197297928", "657314.locus_tag:CK5_14710", "411460.145847612",
"457412.251849367", "657314.locus_tag:CK5_20860", "471875.197297907",
"657321.locus_tag:RBR_07980"), count_Conser = c(7L, 1L, 2L, 1L,
3L, 0L, 1L, 0L, 4L, 0L, 3L, 4L, 1L, 3L, 0L, 5L, 2L, 2L, 1L, 0L,
0L, 2L, 3L, 0L, 2L, 1L, 1L, 4L, 0L, 0L, 0L, 1L, 1L, 5L, 0L, 0L,
2L, 0L, 1L, 1L, 2L, 0L, 1L, 1L, 1L, 3L, 1L, 2L, 0L, 0L, 0L, 1L,
0L, 0L, 2L, 1L, 1L, 0L, 1L, 4L, 0L, 1L, 1L, 4L, 0L, 7L, 0L, 4L,
1L, 1L, 2L, 0L, 1L, 0L, 0L, 2L, 3L, 0L, 4L, 0L, 1L, 0L, 1L, 4L,
1L, 0L, 5L, 4L, 0L, 6L, 2L, 1L, 3L, 1L, 0L, 2L, 3L, 0L, 1L, 12L,
1L, 1L, 2L, 0L, 0L, 2L, 1L, 2L, 1L, 3L, 2L, 0L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 0L, 1L, 0L, 2L, 1L, 1L, 1L, 1L,
0L, 2L, 0L, 2L, 2L, 5L, 2L, 18L, 0L, 4L, 2L, 0L, 3L, 0L, 1L,
0L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 0L, 1L, 0L, 1L, 0L, 2L, 0L,
0L, 1L, 1L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 1L, 1L, 2L, 3L, 2L, 0L,
0L, 0L, 3L, 3L, 1L, 1L, 0L, 0L, 3L, 1L, 1L, 0L, 0L, 1L, 0L, 6L,
0L, 3L, 8L, 1L, 3L, 0L, 0L, 3L, 5L, 0L, 1L, 0L, 0L, 1L, 0L, 4L,
3L, 1L, 2L, 0L, 0L, 0L, 4L, 0L, 6L, 6L, 0L, 1L, 2L, 0L, 2L, 3L,
1L, 3L, 0L, 2L, 4L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 2L, 2L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 4L, 0L,
0L, 3L, 3L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 3L, 0L, 2L, 5L, 0L,
0L, 1L, 0L, 8L, 1L, 8L, 2L, 0L, 1L), count_NonCons = c(5L, 4L,
4L, 0L, 0L, 2L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 2L, 1L, 1L, 2L, 0L,
0L, 0L, 3L, 1L, 1L, 2L, 1L, 0L, 0L, 4L, 1L, 0L, 4L, 2L, 2L, 15L,
2L, 0L, 2L, 0L, 1L, 0L, 1L, 0L, 3L, 0L, 0L, 8L, 0L, 0L, 0L, 0L,
1L, 2L, 4L, 0L, 0L, 0L, 1L, 3L, 5L, 2L, 0L, 0L, 6L, 0L, 2L, 1L,
1L, 4L, 1L, 4L, 1L, 8L, 5L, 1L, 6L, 1L, 5L, 0L, 11L, 0L, 0L,
0L, 2L, 1L, 0L, 0L, 6L, 1L, 0L, 10L, 2L, 1L, 0L, 1L, 1L, 3L,
2L, 1L, 3L, 4L, 1L, 0L, 12L, 0L, 0L, 1L, 3L, 15L, 9L, 4L, 12L,
2L, 4L, 2L, 0L, 0L, 0L, 2L, 2L, 3L, 1L, 1L, 1L, 0L, 0L, 1L, 0L,
5L, 0L, 0L, 1L, 0L, 3L, 4L, 1L, 1L, 2L, 0L, 0L, 0L, 1L, 3L, 9L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 10L, 2L, 0L, 12L, 0L, 1L,
1L, 2L, 0L, 1L, 1L, 3L, 3L, 1L, 4L, 0L, 2L, 1L, 1L, 4L, 0L, 2L,
5L, 5L, 4L, 0L, 0L, 0L, 2L, 0L, 3L, 0L, 2L, 3L, 2L, 3L, 1L, 4L,
2L, 2L, 0L, 6L, 2L, 1L, 2L, 3L, 0L, 7L, 0L, 0L, 6L, 2L, 2L, 1L,
2L, 0L, 6L, 0L, 0L, 3L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 2L, 2L, 0L,
0L, 4L, 0L, 2L, 1L, 3L, 2L, 0L, 1L, 0L, 1L, 0L, 6L, 1L, 1L, 1L,
2L, 2L, 4L, 1L, 0L, 0L, 2L, 3L, 2L, 0L, 1L, 0L, 0L, 0L, 1L, 2L,
1L, 0L, 16L, 1L, 3L, 0L, 5L, 10L, 1L, 2L, 4L, 0L, 6L, 0L, 0L,
0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 11L, 1L, 4L, 5L, 1L, 1L),
count_ConsSubst = c(5, 3, 1, 1, 3, 1, 0, 1, 1, 0, 0, 2, 0,
0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 3, 0, 1, 0, 0,
0, 6, 1, 1, 1, 0, 0, 0, 1, 2, 1, 0, 0, 4, 0, 0, 1, 0, 0,
4, 1, 0, 0, 0, 0, 1, 0, 3, 0, 1, 0, 2, 1, 3, 0, 3, 0, 3,
2, 0, 1, 1, 3, 4, 2, 0, 9, 0, 1, 1, 1, 0, 2, 0, 1, 1, 0,
1, 1, 3, 0, 2, 0, 1, 0, 2, 2, 1, 3, 0, 6, 0, 0, 0, 2, 7,
3, 1, 5, 1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 1, 0,
0, 0, 1, 0, 0, 3, 1, 0, 1, 1, 2, 0, 2, 0, 5, 2, 0, 0, 0,
0, 2, 0, 2, 0, 0, 3, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 2, 1,
1, 1, 0, 0, 1, 1, 4, 0, 1, 0, 1, 5, 0, 0, 0, 5, 2, 1, 0,
0, 1, 0, 0, 0, 4, 0, 2, 1, 1, 1, 2, 1, 1, 1, 4, 1, 2, 1,
1, 2, 0, 0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 1, 1, 0, 3, 1, 1,
2, 2, 1, 1, 1, 1, 0, 2, 1, 1, 0, 0, 0, 1, 0, 0, 0, 3, 2,
0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 3, 1, 0, 0,
3, 4, 0, 5, 1, 0, 4, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 4,
1, 4, 0, 0, 0), count_NCSubst = c(1, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 1, 0, 0, 1, 3, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1, 0, 1, 0, 5, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0,
0, 1, 1, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
1, 0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0)), .Names = c("geneid", "count_Conser",
"count_NonCons", "count_ConsSubst", "count_NCSubst"), class =
"data.frame", row.names = c(NA,
-284L))
On 04/11/2012 08:01 PM, Jean V Adams wrote:
>
> Alison,
>
> Your code works fine on the first six lines of the data that you
> provided.
>
> Rumino_Reps_agreeWalign <- data.frame(
> geneid = c("657313.locus_tag:RTO_08940",
> "457412.251848018",
> "657314.locus_tag:CK5_20630",
> "657323.locus_tag:CK1_33060",
> "657313.locus_tag:RTO_09690",
> "471875.197297106"),
> count_Conser = c(7, 1, 2, 1, 3, 0),
> count_NonCons = c(5, 4, 4, 0, 0, 2),
> count_ConsSubst = c(5, 3, 1, 1, 3, 1),
> count_NCSubst = c(1, 0, 0, 0, 1, 1))
> gene.list <- strsplit(as.character(Rumino_Reps_agreeWalign$geneid),
> "\\.")
> Rumino_Reps_agreeWalignTR <- transform(Rumino_Reps_agreeWalign,
> taxid=do.call(rbind, gene.list))
>
> Perhaps in later rows of the data there are cases where there is no
> "." in geneid? If not, can you provide a subset of your data that
> results in the warning? Use the dput() function.
>
> It's not a good idea to create an object named "strsplit". That will
> only mask the function strsplit() in later runs.
>
> If time is an issue, a slightly faster way to do this, after the
> strsplit() function is:
> Rumino_Reps_agreeWalign$geneid.prefix <- sapply(gene.list, "[", 1)
> Rumino_Reps_agreeWalign$geneid.suffix <- sapply(gene.list, "[", 2)
>
> Jean
>
>
> alison waller wrote on 04/11/2012 08:23:29 AM:
>
> > Dear all,
> >
> > I want to use string split to parse column names, however, I am having
> > some errors that I don't understand.
> > I see a problem when I try to rbind the output from strsplit.
> >
> > please let me know if I'm missing something obvious,
> >
> > thanks,
> > alison
> >
> > here are my commands:
> > >strsplit<-strsplit(as.character(Rumino_Reps_agreeWalign$geneid),"\\.")
> > >
> > Rumino_Reps_agreeWalignTR<-transform
> > (Rumino_Reps_agreeWalign,taxid=do.call(rbind,
> > strsplit))
> > Warning message:
> > In function (..., deparse.level = 1) :
> > number of columns of result is not a multiple of vector length
> (arg 1)
> >
> >
> > here is my data:
> >
> > > head(Rumino_Reps_agreeWalign)
> > geneid count_Conser count_NonCons count_ConsSubst
> > 1 657313.locus_tag:RTO_08940 7 5 5
> > 2 457412.251848018 1 4 3
> > 3 657314.locus_tag:CK5_20630 2 4 1
> > 4 657323.locus_tag:CK1_33060 1 0 1
> > 5 657313.locus_tag:RTO_09690 3 0 3
> > 6 471875.197297106 0 2 1
> > count_NCSubst
> > 1 1
> > 2 0
> > 3 0
> > 4 0
> > 5 1
> > 6 1
> >
> > here are the results from strsplit:
> > > head(strsplit)
> > [[1]]
> > [1] "657313" "locus_tag:RTO_08940"
> >
> > [[2]]
> > [1] "457412" "251848018"
> >
> > [[3]]
> > [1] "657314" "locus_tag:CK5_20630"
> >
> > [[4]]
> > [1] "657323" "locus_tag:CK1_33060"
> >
> > [[5]]
> > [1] "657313" "locus_tag:RTO_09690"
> >
> > [[6]]
> > [1] "471875" "197297106"
[[alternative HTML version deleted]]
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.