diff --git a/libcpp/files.cc b/libcpp/files.cc
index d80c4bfd90775..2a10752691ac0 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -710,6 +710,105 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir,
   return file;
 }
 
+/* Detect input file encoding from first 4 bytes and return the charset name.
+   Also returns the BOM length to skip in *BOM_LEN.
+
+   Detection logic:
+   1) If first 4 bytes are all zero -> binary file (returns NULL)
+   2) BOM detection:
+      - UTF-32 LE BOM: 0xFF 0xFE 0x00 0x00 -> "UTF-32LE"
+      - UTF-32 BE BOM: 0x00 0x00 0xFE 0xFF -> "UTF-32BE"
+      - UTF-16 LE BOM: 0xFF 0xFE (not followed by 0x00 0x00) -> "UTF-16LE"
+      - UTF-16 BE BOM: 0xFE 0xFF -> "UTF-16BE"
+      - UTF-8 BOM: 0xEF 0xBB 0xBF -> "UTF-8"
+   3) Null byte pattern detection (no BOM):
+      - bytes[1]==0 && bytes[3]==0 -> "UTF-16LE"
+      - bytes[0]==0 && bytes[2]==0 -> "UTF-16BE"
+      - bytes[2]==0 && bytes[3]==0 -> "UTF-32LE"
+      - bytes[0]==0 && bytes[1]==0 -> "UTF-32BE"
+   4) Otherwise, return NULL (use the provided input_charset).
+
+   If file is less than 4 bytes, only applicable checks are performed.  */
+
+static const char *
+detect_encoding (const uchar *buf, ssize_t len, size_t *bom_len)
+{
+  *bom_len = 0;
+
+  if (len < 1)
+    return NULL;
+
+  /* Check for binary file (all first 4 bytes are zero).  */
+  if (len >= 4
+      && buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0)
+    return "BINARY";
+
+  /* Check for UTF-32 LE BOM: 0xFF 0xFE 0x00 0x00
+     Must check before UTF-16 LE BOM since it starts with 0xFF 0xFE.  */
+  if (len >= 4
+      && buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00)
+    {
+      *bom_len = 4;
+      return "UTF-32LE";
+    }
+
+  /* Check for UTF-32 BE BOM: 0x00 0x00 0xFE 0xFF.  */
+  if (len >= 4
+      && buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF)
+    {
+      *bom_len = 4;
+      return "UTF-32BE";
+    }
+
+  /* Check for UTF-16 LE BOM: 0xFF 0xFE (not followed by 0x00 0x00).  */
+  if (len >= 2 && buf[0] == 0xFF && buf[1] == 0xFE)
+    {
+      *bom_len = 2;
+      return "UTF-16LE";
+    }
+
+  /* Check for UTF-16 BE BOM: 0xFE 0xFF.  */
+  if (len >= 2 && buf[0] == 0xFE && buf[1] == 0xFF)
+    {
+      *bom_len = 2;
+      return "UTF-16BE";
+    }
+
+  /* Check for UTF-8 BOM: 0xEF 0xBB 0xBF.
+     Note: UTF-8 BOM is handled separately in _cpp_convert_input,
+     so we don't need to strip it here. Just recognize it.  */
+  if (len >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF)
+    {
+      /* UTF-8 BOM is handled by _cpp_convert_input, no need to change charset
+	 or strip BOM here.  */
+      return NULL;
+    }
+
+  /* No BOM found. Try to infer encoding from null byte patterns.
+     Only check if we have at least 4 bytes.  */
+  if (len >= 4)
+    {
+      /* UTF-16 LE: 2nd and 4th bytes are zero (for ASCII-range characters).  */
+      if (buf[1] == 0 && buf[3] == 0 && (buf[0] != 0 || buf[2] != 0))
+	return "UTF-16LE";
+
+      /* UTF-16 BE: 1st and 3rd bytes are zero (for ASCII-range characters).  */
+      if (buf[0] == 0 && buf[2] == 0 && (buf[1] != 0 || buf[3] != 0))
+	return "UTF-16BE";
+
+      /* UTF-32 LE: 2nd, 3rd, and 4th bytes are zero.  */
+      if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0 && buf[0] != 0)
+	return "UTF-32LE";
+
+      /* UTF-32 BE: 1st, 2nd, and 3rd bytes are zero.  */
+      if (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] != 0)
+	return "UTF-32BE";
+    }
+
+  /* No encoding detected, use the provided charset.  */
+  return NULL;
+}
+
 /* Read a file into FILE->buffer, returning true on success.
 
    If FILE->fd is something weird, like a block device, we don't want
@@ -795,9 +894,45 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file, location_t loc,
     cpp_error_at (pfile, CPP_DL_WARNING, loc,
 		  "%s is shorter than expected", file->path);
 
+  /* Auto-detect encoding from first 4 bytes if input_charset is not
+     explicitly specified.  */
+  const char *detected_charset = NULL;
+  size_t bom_len = 0;
+
+  if (total >= 1)
+    {
+      detected_charset = detect_encoding (buf, total, &bom_len);
+
+      /* Check for binary file.  */
+      if (detected_charset && strcmp (detected_charset, "BINARY") == 0)
+	{
+	  if (pfile)
+	    cpp_error_at (pfile, CPP_DL_ERROR, loc,
+			  "%s appears to be a binary file", file->path);
+	  free (buf);
+	  return false;
+	}
+    }
+
+  /* Use detected charset if we found one, otherwise use the provided one.  */
+  const char *effective_charset = detected_charset ? detected_charset
+						   : input_charset;
+
+  /* If we have a BOM to skip, adjust the buffer.  */
+  ssize_t convert_len = total;
+
+  if (bom_len > 0 && (size_t)total >= bom_len)
+    {
+      /* Move data to skip the BOM.  We need to adjust the buffer
+	 so _cpp_convert_input doesn't see the BOM.  */
+      convert_len = total - bom_len;
+      memmove (buf, buf + bom_len, convert_len);
+    }
+
   file->buffer = _cpp_convert_input (pfile,
-				     input_charset,
-				     buf, size + pad, total,
+				     effective_charset,
+				     buf, size + pad,
+				     convert_len,
 				     &file->buffer_start,
 				     &file->st.st_size);
   file->buffer_valid = file->buffer;