On Sat, 20 Mar 2021, Fianu, Emmanuel Senyo writes: > Hello Enrico, > > here is a reproducible result: > As you could see, I have separated CAP ATM Fwd and SWT 0.5% fwd > Now, I move on to have two separate tables). I would be grateful for any > support. > Currently, I want to remove all: "c(\"" and if necessary have decimals > instead of % symbols. > #my.data$Strike > #1 CAP ATM Fwd: -0.49% > #2 SWT 0.50% fwd: 0.05% > # 3 CAP ATM Fwd: -0.46% > #4 SWT 0.50% fwd: 0.08% > # 5 CAP ATM Fwd: -0.40% > #6 SWT 0.50% fwd: 0.11% > > > txt0<-strsplit(as.character(df),'CAP ATM Fwd:', "") > #[[1]] > #[1] "c(\"" " -0.49%\", \"SWT 0.50% > fwd: 0.05%\", \"" > #[3] " -0.46%\", \"SWT 0.50% fwd: 0.08%\", \"" " -0.40%\", \"SWT 0.50% > fwd: 0.11%\", \"" > #[5] " -0.32%\", \"SWT 0.50% fwd: 0.14%\", \"" " -0.23%\", \"SWT 0.50% > fwd: 0.17%\", \"" > #[7] " -0.14%\", \"SWT 0.50% fwd: 0.20%\", \"" " -0.05%\", \"SWT 0.50% > fwd: 0.23%\", \"" > #[9] " 0.07%\", \"SWT 0.50% fwd: 0.25%\", \"" " 0.15%\", \"SWT 0.50% fwd: > 0.27%\", \"" > #[11] " 0.22%\", \"SWT 0.50% fwd: 0.28%\", \"" " 0.29%\", \"SWT 0.50% > fwd: 0.28%\", \n\"" > #[13] " 0.32%\", \"SWT 0.50% fwd: 0.28%\", \"" " 0.34%\", \"SWT 0.50% > fwd: 0.28%\", \"" > #[15] " 0.35%\", \"SWT 0.50% fwd: 0.26%\", \"" " 0.33%\", \"SWT 0.50% > fwd: 0.25%\", \"" > #[17] " 0.28%\", \"SWT 0.50% fwd: 0.22%\", \"" " 0.26%\", \"SWT 0.50% > fwd: 0.20%\", \"" > #[19] " 0.21%\", \"SWT 0.50% fwd: 0.18%\", \"" " 0.13%\", \"" > > #[21] " 0.11%\", \"" " 0.08%\")" > > txt1<-strsplit(as.character(df),'SWT 0.50% fwd:',"x") > #[[1]] > #[1] "c(\"CAP ATM Fwd: -0.49%\", \"" > > #[2] " 0.05%\", \"CAP ATM Fwd: -0.46%\", \"" > > #[3] " 0.08%\", \"CAP ATM Fwd: -0.40%\", \"" > > #[4] " 0.11%\", \"CAP ATM Fwd: -0.32%\", \"" > > #[5] " 0.14%\", \"CAP ATM Fwd: -0.23%\", \"" > > #[6] " 0.17%\", \"CAP ATM Fwd: -0.14%\", \"" > > #[7] " 0.20%\", \"CAP ATM Fwd: -0.05%\", \"" > > #[8] " 0.23%\", \"CAP ATM Fwd: 0.07%\", \"" > > #[9] " 0.25%\", \"CAP ATM Fwd: 0.15%\", \"" > > #[10] " 0.27%\", \"CAP ATM Fwd: 0.22%\", \"" > > #[11] " 0.28%\", \"CAP ATM Fwd: 0.29%\", \"" > > #[12] " 0.28%\", \n\"CAP ATM Fwd: 0.32%\", \"" > > #[13] " 0.28%\", \"CAP ATM Fwd: 0.34%\", \"" > > #[14] " 0.28%\", \"CAP ATM Fwd: 0.35%\", \"" > > #[15] " 0.26%\", \"CAP ATM Fwd: 0.33%\", \"" > > #[16] " 0.25%\", \"CAP ATM Fwd: 0.28%\", \"" > > #[17] " 0.22%\", \"CAP ATM Fwd: 0.26%\", \"" > > #[18] " 0.20%\", \"CAP ATM Fwd: 0.21%\", \"" > > #[19] " 0.18%\", \"CAP ATM Fwd: 0.13%\", \"CAP ATM Fwd: 0.11%\", \"CAP ATM > Fwd: 0.08%\")" > #df <- as.data.frame(my.data$Strike) > #head(df) > #my.data$Strike > #1 CAP ATM Fwd: -0.49% > #2 SWT 0.50% fwd: 0.05% > # 3 CAP ATM Fwd: -0.46% > #4 SWT 0.50% fwd: 0.08% > # 5 CAP ATM Fwd: -0.40% > #6 SWT 0.50% fwd: 0.11% > > txt0<-strsplit(as.character(df),'CAP ATM Fwd:', "") > > #[[1]] > #[1] "c(\"" " -0.49%\", \"SWT 0.50% > fwd: 0.05%\", \"" > #[3] " -0.46%\", \"SWT 0.50% fwd: 0.08%\", \"" " -0.40%\", \"SWT 0.50% > fwd: 0.11%\", \"" > #[5] " -0.32%\", \"SWT 0.50% fwd: 0.14%\", \"" " -0.23%\", \"SWT 0.50% > fwd: 0.17%\", \"" > #[7] " -0.14%\", \"SWT 0.50% fwd: 0.20%\", \"" " -0.05%\", \"SWT 0.50% > fwd: 0.23%\", \"" > #[9] " 0.07%\", \"SWT 0.50% fwd: 0.25%\", \"" " 0.15%\", \"SWT 0.50% fwd: > 0.27%\", \"" > #[11] " 0.22%\", \"SWT 0.50% fwd: 0.28%\", \"" " 0.29%\", \"SWT 0.50% > fwd: 0.28%\", \n\"" > #[13] " 0.32%\", \"SWT 0.50% fwd: 0.28%\", \"" " 0.34%\", \"SWT 0.50% > fwd: 0.28%\", \"" > #[15] " 0.35%\", \"SWT 0.50% fwd: 0.26%\", \"" " 0.33%\", \"SWT 0.50% > fwd: 0.25%\", \"" > #[17] " 0.28%\", \"SWT 0.50% fwd: 0.22%\", \"" " 0.26%\", \"SWT 0.50% > fwd: 0.20%\", \"" > #[19] " 0.21%\", \"SWT 0.50% fwd: 0.18%\", \"" " 0.13%\", \"" > > #[21] " 0.11%\", \"" " 0.08%\")" > > txt1<-strsplit(as.character(df),'SWT 0.50% fwd:',"x") > #[[1]] > #[1] "c(\"CAP ATM Fwd: -0.49%\", \"" > > #[2] " 0.05%\", \"CAP ATM Fwd: -0.46%\", \"" > > #[3] " 0.08%\", \"CAP ATM Fwd: -0.40%\", \"" > > #[4] " 0.11%\", \"CAP ATM Fwd: -0.32%\", \"" > > #[5] " 0.14%\", \"CAP ATM Fwd: -0.23%\", \"" > > #[6] " 0.17%\", \"CAP ATM Fwd: -0.14%\", \"" > > #[7] " 0.20%\", \"CAP ATM Fwd: -0.05%\", \"" > > #[8] " 0.23%\", \"CAP ATM Fwd: 0.07%\", \"" > > #[9] " 0.25%\", \"CAP ATM Fwd: 0.15%\", \"" > > #[10] " 0.27%\", \"CAP ATM Fwd: 0.22%\", \"" > > #[11] " 0.28%\", \"CAP ATM Fwd: 0.29%\", \"" > > #[12] " 0.28%\", \n\"CAP ATM Fwd: 0.32%\", \"" > > #[13] " 0.28%\", \"CAP ATM Fwd: 0.34%\", \"" > > #[14] " 0.28%\", \"CAP ATM Fwd: 0.35%\", \"" > > #[15] " 0.26%\", \"CAP ATM Fwd: 0.33%\", \"" > > #[16] " 0.25%\", \"CAP ATM Fwd: 0.28%\", \"" > > #[17] " 0.22%\", \"CAP ATM Fwd: 0.26%\", \"" > > #[18] " 0.20%\", \"CAP ATM Fwd: 0.21%\", \"" > > #[19] " 0.18%\", \"CAP ATM Fwd: 0.13%\", \"CAP ATM Fwd: 0.11%\", \"CAP ATM > Fwd: 0.08%\")" > > Many thanks, > Emm >
Hi Emm try this: my.data <- list() my.data$Strike <- c("CAP ATM Fwd: -0.49%", "SWT 0.50% fwd: 0.05%", "CAP ATM Fwd: -0.46%", "SWT 0.50% fwd: 0.08%", "CAP ATM Fwd: -0.40%", "SWT 0.50% fwd: 0.11%") res <- strsplit(my.data$Strike, ":") ## [[1]] ## [1] "CAP ATM Fwd" " -0.49%" ## ## [[2]] ## [1] "SWT 0.50% fwd" " 0.05%" ## ## [[3]] ## [1] "CAP ATM Fwd" " -0.46%" ## ## [[4]] ## [1] "SWT 0.50% fwd" " 0.08%" ## ## [[5]] ## [1] "CAP ATM Fwd" " -0.40%" ## ## [[6]] ## [1] "SWT 0.50% fwd" " 0.11%" col1 <- sapply(res, `[[`, 1L) col2 <- sapply(res, `[[`, 2L) col2 <- as.numeric(sub("%", "", col2, fixed = TRUE))/100 res <- data.frame(col1, col2) ## col1 col2 ## 1 CAP ATM Fwd -0.0049 ## 2 SWT 0.50% fwd 0.0005 ## 3 CAP ATM Fwd -0.0046 ## 4 SWT 0.50% fwd 0.0008 ## 5 CAP ATM Fwd -0.0040 ## 6 SWT 0.50% fwd 0.0011 If CAP and SWT rows alternate, you can easily extract every second row: i <- seq(from = 1, to = nrow(res), by = 2) res[i, ] ## col1 col2 ## 1 CAP ATM Fwd -0.0049 ## 3 CAP ATM Fwd -0.0046 ## 5 CAP ATM Fwd -0.0040 res[i + 1, ] ## col1 col2 ## 2 SWT 0.50% fwd 0.0005 ## 4 SWT 0.50% fwd 0.0008 ## 6 SWT 0.50% fwd 0.0011 kind regards Enrico > On Fri, Mar 19, 2021 at 9:07 PM Enrico Schumann <e...@enricoschumann.net> > wrote: > >> On Fri, 19 Mar 2021, Fianu, Emmanuel Senyo writes: >> >> > Hello Enrico, >> > >> > thanks for the feedback: this is what I have: >> > As you can see we have to things here: >> > >> > "CAP ATM Fwd" and SWT 0.50% fwd >> > >> > strsplit(as.character(df),':', '')# separate the column entry into >> > multiples specified but not organized into tables. >> > [[1]] >> > [1] "c(\"CAP ATM Fwd" " -0.49%\", \"SWT 0.50% fwd" " 0.05%\", >> > \"CAP ATM Fwd" " -0.46%\", \"SWT 0.50% fwd" >> > [5] " 0.08%\", \"CAP ATM Fwd" " -0.40%\", \"SWT 0.50% fwd" " 0.11%\", >> > \"CAP ATM Fwd" " -0.32%\", \"SWT 0.50% fwd" >> > [9] " 0.14%\", \"CAP ATM Fwd" " -0.23%\", \"SWT 0.50% fwd" " 0.17%\", >> > \"CAP ATM Fwd" " -0.14%\", \"SWT 0.50% fwd" >> > [13] " 0.20%\", \"CAP ATM Fwd" " -0.05%\", \"SWT 0.50% fwd" " 0.23%\", >> > \"CAP ATM Fwd" " 0.07%\", \"SWT 0.50% fwd" >> > [17] " 0.25%\", \"CAP ATM Fwd" " 0.15%\", \"SWT 0.50% fwd" " 0.27%\", >> > \"CAP ATM Fwd" " 0.22%\", \"SWT 0.50% fwd" >> > >> > Many thanks >> > Emmanuel >> > >> >> (I put R-SIG-Finance into Cc. again.) >> >> >> As I said, please provide a reproducible example: use >> ?dput to provide example data, and also describe what >> you want to achieve. Perhaps this helps: >> >> txt <- c("CAP ATM Fwd: -0.49%", "CAP ATM Fwd: -0.49%") >> spl <- strsplit(txt, ":", fixed = TRUE) >> ## [[1]] >> ## [1] "CAP ATM Fwd" " -0.49%" >> ## >> ## [[2]] >> ## [1] "CAP ATM Fwd" " -0.49%" >> >> sapply(spl, `[[`, 1) ## column 1 >> ## [1] "CAP ATM Fwd" "CAP ATM Fwd" >> >> sapply(spl, `[[`, 2) ## column 2 >> ## [1] " -0.49%" " -0.49%" >> >> Now you can remove the '%' and call 'as.numeric' and so on.... >> >> [...] >> >> > On Fri, Mar 19, 2021 at 9:59 AM Enrico Schumann <e...@enricoschumann.net> >> > wrote: >> > >> >> On Fri, 19 Mar 2021, Fianu, Emmanuel Senyo writes: >> >> >> >> > Dear All, >> >> > >> >> > Please, I am working on raw financial data and would like to have the >> >> data >> >> > cleaned. I am working on it a bit further and will be very grateful >> for >> >> any >> >> > idea or support. Below are my codes and results. >> >> > I would like to have the numbers after the semi-columns separate. >> >> > >> >> > >> >> > Strike<-unlist(c(my.data$Strike)) >> >> > # >> >> > # >> >> > Strike <- sapply(Strike[], trimws) # löscht überflüssige >> Leerzeichen >> >> > Strike <- Strike[-1] # löscht die erste Zeile >> >> > >> >> > CAP ATM Fwd: -0.49% SWT 0.50% fwd: 0.05% >> >> > "CAP ATM Fwd: -0.49%" "SWT 0.50% fwd: >> 0.05%" >> >> > CAP ATM Fwd: -0.46% SWT 0.50% fwd: 0.08% >> >> > "CAP ATM Fwd: -0.46%" "SWT 0.50% fwd: >> 0.08%" >> >> > CAP ATM Fwd: -0.40% SWT 0.50% fwd: 0.11% >> >> > "CAP ATM Fwd: -0.40%" "SWT 0.50% fwd: >> 0.11%" >> >> > CAP ATM Fwd: -0.32% SWT 0.50% fwd: 0.14% >> >> > "CAP ATM Fwd: -0.32%" "SWT 0.50% fwd: >> 0.14%" >> >> > CAP ATM Fwd: -0.23% SWT 0.50% fwd: 0.17% >> >> > "CAP ATM Fwd: -0.23%" "SWT 0.50% fwd: >> 0.17%" >> >> > CAP ATM Fwd: -0.14% SWT 0.50% fwd: 0.20% >> >> > "CAP ATM Fwd: -0.14%" "SWT 0.50% fwd: >> 0.20%" >> >> > CAP ATM Fwd: -0.05% SWT 0.50% fwd: 0.23% >> >> > "CAP ATM Fwd: -0.05%" "SWT 0.50% fwd: >> 0.23%" >> >> > CAP ATM Fwd: 0.07% SWT 0.50% fwd: 0.25% >> >> > "CAP ATM Fwd: 0.07%" "SWT 0.50% fwd: >> 0.25%" >> >> > >> >> > Many thanks >> >> > Emmanuel >> >> > >> >> >> >> Please provide a reproducible example, so that people can help you. >> >> >> >> (It's not what you have written, but did you mean "the >> >> numbers after the colons"? Then perhaps look at >> >> ?strsplit : >> >> >> >> strsplit("CAP ATM Fwd: -0.49%", split = ":", fixed = TRUE) >> >> ## [[1]] >> >> ## [1] "CAP ATM Fwd" " -0.49%" >> >> >> >> sapply(strsplit("CAP ATM Fwd: -0.49%", ":", fixed = TRUE), `[[`, 2L) >> >> ## [1] " -0.49%" >> >> >> >> ) -- Enrico Schumann Lucerne, Switzerland http://enricoschumann.net _______________________________________________ R-SIG-Finance@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-sig-finance -- Subscriber-posting only. If you want to post, subscribe first. -- Also note that this is not the r-help list where general R questions should go.