diff --git a/f265/analyze.c b/f265/analyze.c
index 60cf647..77b18c9 100644
--- a/f265/analyze.c
+++ b/f265/analyze.c
@@ -902,8 +902,9 @@ printf("unsplit won\n");
 }
 
 // Compute the rough cost of using a specific intra prediction mode.
-int64_t fenc_analyze_intra_rough_cost(f265_enc_thread *t, f265_cb *cb, int part_idx, int lg_bs,
-                                      int cb_ox, int cb_oy, int mode)
+// 4x4 blocks are handled by the caller who has to correctly offset the cb_ox
+// and cb_oy values.
+int64_t fenc_analyze_intra_rough_cost(f265_enc_thread *t, f265_cb *cb, int lg_bs, int cb_ox, int cb_oy, int mode)
 {
     int nz_flag = 0;
     int64_t cost = 0;
@@ -914,15 +915,8 @@ int64_t fenc_analyze_intra_rough_cost(f265_enc_thread *t, f265_cb *cb, int part_
                 cost += fenc_analyze_intra_tb(t, cb, &nz_flag, mode, 0, 5, cb_ox+x, cb_oy+y);
     }
 
-    else if (lg_bs > 2)
-        cost = fenc_analyze_intra_tb(t, cb, &nz_flag, mode, 0, lg_bs, cb_ox, cb_oy);
-
     else
-    {
-        int y = part_idx > 1 ? 2 : 0;
-        int x = part_idx & 1 ? 2 : 0;
-        cost = fenc_analyze_intra_tb(t, cb, &nz_flag, mode, 0, lg_bs, cb_ox+x, cb_oy+y);
-    }
+        cost = fenc_analyze_intra_tb(t, cb, &nz_flag, mode, 0, lg_bs, cb_ox, cb_oy);
 
     return cost;
 }
@@ -953,7 +947,7 @@ int64_t fenc_analyze_intra_mode_cost(f265_enc_thread *t, f265_cb *cb, int depth_
     }
 
     int64_t tt_cost = 0;
-    if (rmd_flag) tt_cost = fenc_analyze_intra_rough_cost(t, cb, part_idx, lg_bs, cb_ox, cb_oy, mode);
+    if (rmd_flag) tt_cost = fenc_analyze_intra_rough_cost(t, cb, lg_bs, cb_ox, cb_oy, mode);
 
     else
     {
@@ -1358,16 +1352,12 @@ static int64_t fenc_analyze_intra_part_luma(f265_enc_thread *t, f265_cb *cb, int
     // Predict the MPMs and set up the mode cost table.
     int mpm_list[3];
     uint16_t mode_costs[35];
-    #if 1
     if (t->an.rdm_flag)
     {
-    #endif
-    fenc_get_intra_pred_mode(t, cb, part_idx, mpm_list);
-    for (int i = 0; i < 35; i++) mode_costs[i] = t->an.se_costs[F265_SE_INTRA_LUMA_MODE+3];
-    for (int i = 0; i < 3; i++) mode_costs[mpm_list[i]] = t->an.se_costs[F265_SE_INTRA_LUMA_MODE+i];
-    #if 1
+        fenc_get_intra_pred_mode(t, cb, part_idx, mpm_list);
+        for (int i = 0; i < 35; i++) mode_costs[i] = t->an.se_costs[F265_SE_INTRA_LUMA_MODE+3];
+        for (int i = 0; i < 3; i++) mode_costs[mpm_list[i]] = t->an.se_costs[F265_SE_INTRA_LUMA_MODE+i];
     }
-    #endif
 
     #ifdef VAN_TRACE_ANALYSIS
     if (fenc_trace_analysis_flag)
@@ -1392,7 +1382,7 @@ static int64_t fenc_analyze_intra_part_luma(f265_enc_thread *t, f265_cb *cb, int
 
         if (cost < best_cost)
         {
-            // Eventually, this could be optimized by not resetting&saving for
+            // Eventually, this could be optimized by not resetting and saving for
             // the last mode.
             if (stash_flag && !rough_flag) fenc_stash_save(t);
             best_mode = mode;
@@ -1442,8 +1432,8 @@ static int64_t fenc_analyze_intra_part_luma(f265_enc_thread *t, f265_cb *cb, int
         // Rules for the 7 "base" modes.
         if (algo_flags&1)
         {
-            // These rules are a little more complex since we have to choose one of
-            // four intervals.
+            // These rules are a little more complex since we have to choose one 
+            // of four intervals.
             if (best_mode == 2)
             {
                 modes[1] = 10;
@@ -3355,11 +3345,7 @@ static int64_t fenc_visit_cb_bottom_up(f265_enc_thread *t, f265_cb *cb)
 
     // Get the split flag costs.
     uint16_t split_flag_costs[2] = { 0, 0 };
-    #if 0
-    if (split_flag && unsplit_flag)
-    #else
     if (t->an.rdm_flag && split_flag && unsplit_flag)
-    #endif
     {
         int off = fenc_get_split_cu_ctx_off(t, cb)<<1;
         for (int i = 0; i < 2; i++) split_flag_costs[i] = t->an.se_costs[F265_SE_SPLIT_CU + off + i];
@@ -3591,9 +3577,11 @@ int fenc_analyze_ctb(f265_enc_thread *t)
     t->an.rdo_lambda = t->hm_lambda[0]*256.0f + 0.5f;
     t->me.lambda = t->an.rdm_lambda = (sqrt(t->hm_lambda[0])*256.0f) + 0.5f;
 
-    // Update the costs based on the fixed/initial/current CABAC context values.
+    // Update the costs every time the CABAC engine is scheduled for a reset.
+    // Avoids having threads use uninitialized data when multithreading is used.
     // Experimental code.
-    if (t->ctb_xy == 0 || 0) fenc_update_se_costs(t);
+    int init_flag = F265_GET_FLAG(t->seg_flags, F265_SC_CABAC_INIT);
+    if (init_flag) fenc_update_se_costs(t);
 
     // Set the stash flags.
     t->stash.flags = t->an.rdm_flag ? 8 : 31;
