Hi I'm trying to extract complete rows from a dataframe by group based on the maximum in a column within that group. Thus I have a dataframe:
cvd_basestudy ... es_time ... _____________ study1 ... 0.3091667 study2 ... 0.3091667 study2 ... 0.2625000 study3 ... 0.3033333 study3 ... 0.2625000 __________ etc I can extract the basestudy and the max(es_time) using ddply ddply(datares_sinus_variable, .(cvd_basestudy), function(x){max(x[['es_time']])}) or by by(datares_sinus_variable$es_time, datares_sinus_variable$cvd_basestudy, max) but how do I extract the whole line so that I can get a dataframe with all the data for the maximum line? (dput output from first 5 rows of my actual dataframe follows) Any help would be much appreciated. Thanks in advance Sandy Small structure(list(cvd_basestudy = c("study1", "study2", "study2", "study3", "study3"), ecd_rhythm = structure(c(5L, 5L, 5L, 5L, 5L), .Label = c("AF", "FLUTTER", "PACED AF", "SCRAP", "SINUS", "UNSURE"), class = "factor"), cvd_frame_mode = structure(c(2L, 2L, 2L, 2L, 2L), .Label = c("fixed_time", "variable_time"), class = "factor"), cvd_part_fmt = structure(c(4L, 4L, 4L, 4L, 4L), .Label = c("first", "last", "mid", "whole"), class = "factor"), cvd_prev_fmt = structure(c(1L, 2L, 1L, 3L, 2L), .Label = c("All", "Best", "Q1", "Q2", "Q3", "Q4"), class = "factor"), cvd_cur_fmt = structure(c(5L, 5L, 1L, 4L, 4L), .Label = c("All", "Best", "Q1", "Q2", "Q3", "Q4"), class = "factor"), ps_pt = c(1, 1, 2, 1, 2), es_pt = c(8, 8, 8, 8, 8), ed_pt = c(21, 21, 18, 17, 18), cvd_median_limit = c(1.057, 1.057, 1.048, 1.037, 1.05), cvd_average_beat = c(1.06, 1.06, 1.05, 1.04, 1.05), limit = c(0.9, 0.9, 0.9, 0.9, 0.9), sstd_mi = c(FALSE, FALSE, FALSE, FALSE, FALSE), sstd_hbp = c(FALSE, FALSE, FALSE, FALSE, FALSE), sstd_ptca = c(FALSE, FALSE, FALSE, FALSE, FALSE), sstd_cabg = c(TRUE, TRUE, TRUE, TRUE, TRUE), sstd_norm_perf = c(FALSE, FALSE, FALSE, FALSE, FALSE), sstd_posnegett = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("-", "+"), class = "factor"), sstd_function = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("MODERATE", "NORMAL", "POOR", "VERY POOR"), class = "factor"), cvd_cur_fmt_n = c(3, 3, NA, 2, 2), cvd_prev_fmt_n = c(NA, NA, NA, 1, NA), cvd_cur_fmt2 = structure(c(3L, 3L, 1L, 3L, 3L), .Label = c("All", "Best", "Quartiles"), class = "factor"), cvd_prev_fmt2 = structure(c(1L, 2L, 1L, 3L, 2L), .Label = c("All", "Best", "Quartiles"), class = "factor"), es_time = c(0.309166666666667, 0.309166666666667, 0.2625, 0.303333333333333, 0.2625), es_time_err = c(0.0441666666666667, 0.0441666666666667, 0.04375, 0.0433333333333333, 0.04375), ed_time = c(0.574166666666667, 0.574166666666667, 0.4375, 0.39, 0.4375)), .Names = c("cvd_basestudy", "ecd_rhythm", "cvd_frame_mode", "cvd_part_fmt", "cvd_prev_fmt", "cvd_cur_fmt", "ps_pt", "es_pt", "ed_pt", "cvd_median_limit", "cvd_average_beat", "limit", "sstd_mi", "sstd_hbp", "sstd_ptca", "sstd_cabg", "sstd_norm_perf", "sstd_posnegett", "sstd_function", "cvd_cur_fmt_n", "cvd_prev_fmt_n", "cvd_cur_fmt2", "cvd_prev_fmt2", "es_time", "es_time_err", "ed_time" ), row.names = c("651", "655", "656", "661", "663"), class = "data.frame") ******************************************************************************************************************** This message may contain confidential information. If yo...{{dropped:21}} ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.