This is an automated email from the ASF dual-hosted git repository.
shenghang pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new dc61cbe9d2 [Feature][File] Add markdown parser for RAG support #9714
(#9760)
dc61cbe9d2 is described below
commit dc61cbe9d274e25361239f63cf012129e27d8cde
Author: Joonseo Lee <[email protected]>
AuthorDate: Tue Sep 16 00:23:26 2025 +0900
[Feature][File] Add markdown parser for RAG support #9714 (#9760)
---
.../connector-file/connector-file-base/pom.xml | 6 +
.../seatunnel/file/config/FileFormat.java | 16 +-
.../file/source/reader/MarkdownReadStrategy.java | 279 +++++++++++++++++++++
.../source/reader/MarkdownReadStrategyTest.java | 80 ++++++
.../file/source/reader/TempCollector.java | 43 ++++
.../connector-file-base/src/test/resources/test.md | 146 +++++++++++
6 files changed, 569 insertions(+), 1 deletion(-)
diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml
b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml
index 130d237962..3b1e2f4fb9 100644
--- a/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml
+++ b/seatunnel-connectors-v2/connector-file/connector-file-base/pom.xml
@@ -41,6 +41,7 @@
<jaxen.version>2.0.0</jaxen.version>
<easyexcel.version>4.0.3</easyexcel.version>
<fastexcel-reader.version>0.18.4</fastexcel-reader.version>
+ <flexmark-all.version>0.62.2</flexmark-all.version>
</properties>
<dependencyManagement>
@@ -179,6 +180,11 @@
<version>${easyexcel.version}</version>
</dependency>
+ <dependency>
+ <groupId>com.vladsch.flexmark</groupId>
+ <artifactId>flexmark-all</artifactId>
+ <version>${flexmark-all.version}</version>
+ </dependency>
</dependencies>
<build>
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java
index 4a947ea9e2..57b9e01bdd 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileFormat.java
@@ -34,6 +34,7 @@ import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.BinaryReadSt
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.CsvReadStrategy;
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.ExcelReadStrategy;
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.JsonReadStrategy;
+import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MarkdownReadStrategy;
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.OrcReadStrategy;
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.ParquetReadStrategy;
import
org.apache.seatunnel.connectors.seatunnel.file.source.reader.ReadStrategy;
@@ -167,7 +168,20 @@ public enum FileFormat implements Serializable {
throw new UnsupportedOperationException(
"File format 'maxwell_json' does not support reading.");
}
- };
+ },
+ MARKDOWN("md", "markdown") {
+ @Override
+ public WriteStrategy getWriteStrategy(FileSinkConfig fileSinkConfig) {
+ throw new UnsupportedOperationException(
+ "File format 'markdown' does not support writing.");
+ }
+
+ @Override
+ public ReadStrategy getReadStrategy() {
+ return new MarkdownReadStrategy();
+ }
+ },
+ ;
private final String[] suffix;
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategy.java
new file mode 100644
index 0000000000..675fe5c69c
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategy.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.file.source.reader;
+
+import org.apache.seatunnel.api.source.Collector;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import
org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException;
+
+import com.vladsch.flexmark.ast.BlockQuote;
+import com.vladsch.flexmark.ast.BulletList;
+import com.vladsch.flexmark.ast.Code;
+import com.vladsch.flexmark.ast.FencedCodeBlock;
+import com.vladsch.flexmark.ast.Heading;
+import com.vladsch.flexmark.ast.Image;
+import com.vladsch.flexmark.ast.Link;
+import com.vladsch.flexmark.ast.ListItem;
+import com.vladsch.flexmark.ast.OrderedList;
+import com.vladsch.flexmark.ast.Paragraph;
+import com.vladsch.flexmark.ast.ThematicBreak;
+import com.vladsch.flexmark.ext.tables.TableBlock;
+import com.vladsch.flexmark.ext.tables.TableCell;
+import com.vladsch.flexmark.ext.tables.TableRow;
+import com.vladsch.flexmark.parser.Parser;
+import com.vladsch.flexmark.util.ast.Node;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+
+@Slf4j
+public class MarkdownReadStrategy extends AbstractReadStrategy {
+
+ private static final int DEFAULT_PAGE_NUMBER = 1;
+ private static final int DEFAULT_POSITION = 1;
+
+ private static class NodeInfo {
+ String elementId;
+ String parentId;
+ List<String> childIds = new ArrayList<>();
+ int positionIndex;
+
+ NodeInfo(String elementId, String parentId, int positionIndex) {
+ this.elementId = elementId;
+ this.parentId = parentId;
+ this.positionIndex = positionIndex;
+ }
+ }
+
+ @Override
+ public void read(String path, String tableId, Collector<SeaTunnelRow>
output)
+ throws IOException, FileConnectorException {
+ String markdown = new String(Files.readAllBytes(Paths.get(path)));
+ Parser parser = Parser.builder().build();
+ Node document = parser.parse(markdown);
+
+ Map<Node, NodeInfo> nodeInfoMap = new IdentityHashMap<>();
+ Map<String, Integer> typeCounters = new HashMap<>();
+ List<SeaTunnelRow> rows = new ArrayList<>();
+
+ assignIdsAndCollectTree(document, null, nodeInfoMap, DEFAULT_POSITION,
typeCounters);
+ generateRows(document, rows, nodeInfoMap, DEFAULT_PAGE_NUMBER);
+
+ for (SeaTunnelRow row : rows) {
+ output.collect(row);
+ }
+ }
+
+ private void assignIdsAndCollectTree(
+ Node node,
+ Node parent,
+ Map<Node, NodeInfo> nodeInfoMap,
+ int position,
+ Map<String, Integer> typeCounters) {
+ String elementType = node.getClass().getSimpleName();
+ String elementId = null;
+
+ if (isEligibleForRow(node)) {
+ int count = typeCounters.getOrDefault(elementType, 0) + 1;
+ typeCounters.put(elementType, count);
+ elementId = elementType + "_" + count;
+ }
+
+ String parentId = parent == null ? null :
nodeInfoMap.get(parent).elementId;
+ NodeInfo nodeInfo = new NodeInfo(elementId, parentId, position);
+ nodeInfoMap.put(node, nodeInfo);
+
+ int childPosition = 1;
+ for (Node child = node.getFirstChild(); child != null; child =
child.getNext()) {
+ assignIdsAndCollectTree(child, node, nodeInfoMap, childPosition++,
typeCounters);
+ NodeInfo childInfo = nodeInfoMap.get(child);
+ if (childInfo.elementId != null) {
+ nodeInfo.childIds.add(childInfo.elementId);
+ }
+ }
+ }
+
+ private void generateRows(
+ Node node, List<SeaTunnelRow> rows, Map<Node, NodeInfo>
nodeInfoMap, int pageNumber) {
+ if (isEligibleForRow(node)) {
+ NodeInfo nodeInfo = nodeInfoMap.get(node);
+ String elementType = node.getClass().getSimpleName();
+ Integer headingLevel = null;
+ String text = extractValue(node);
+
+ if (node instanceof Heading) {
+ headingLevel = ((Heading) node).getLevel();
+ }
+
+ rows.add(
+ new SeaTunnelRow(
+ new Object[] {
+ nodeInfo.elementId,
+ elementType,
+ headingLevel,
+ text,
+ pageNumber,
+ nodeInfo.positionIndex,
+ nodeInfo.parentId,
+ nodeInfo.childIds.isEmpty()
+ ? null
+ : String.join(",", nodeInfo.childIds)
+ }));
+ log.debug(
+ "Added row: element_id={} type={} heading_level={} text={}
parent_id={} child_ids={}",
+ nodeInfo.elementId,
+ elementType,
+ headingLevel,
+ text,
+ nodeInfo.parentId,
+ nodeInfo.childIds);
+ }
+
+ for (Node child = node.getFirstChild(); child != null; child =
child.getNext()) {
+ generateRows(child, rows, nodeInfoMap, pageNumber);
+ }
+ }
+
+ private boolean isEligibleForRow(Node node) {
+ if (node instanceof Paragraph) {
+ Node parent = node.getParent();
+ if (parent instanceof ListItem || parent instanceof BlockQuote) {
+ return false;
+ }
+ }
+
+ return node instanceof Heading
+ || node instanceof Paragraph
+ || node instanceof ListItem
+ || node instanceof BulletList
+ || node instanceof OrderedList
+ || node instanceof BlockQuote
+ || node instanceof FencedCodeBlock
+ || node instanceof TableBlock;
+ }
+
+ private String extractValue(Node node) {
+ if (node instanceof ListItem) {
+ return extractTextFromChildren(node);
+ } else if (node instanceof Heading || node instanceof Paragraph) {
+ return extractTextFromChildren(node);
+ } else if (node instanceof BulletList) {
+ return bulletListToString((BulletList) node);
+ } else if (node instanceof OrderedList) {
+ return orderedListToString((OrderedList) node);
+ } else if (node instanceof Code) {
+ return ((Code) node).getText().toString();
+ } else if (node instanceof FencedCodeBlock) {
+ return ((FencedCodeBlock) node).getContentChars().toString();
+ } else if (node instanceof BlockQuote) {
+ return extractTextFromChildren(node);
+ } else if (node instanceof ThematicBreak) {
+ return "---";
+ } else if (node instanceof Link) {
+ return ((Link) node).getUrl().toString();
+ } else if (node instanceof Image) {
+ return ((Image) node).getUrl().toString();
+ } else if (node instanceof TableBlock) {
+ return tableToString((TableBlock) node);
+ }
+
+ return node.getChars().toString();
+ }
+
+ private String extractTextFromChildren(Node node) {
+ StringBuilder sb = new StringBuilder();
+ for (Node child = node.getFirstChild(); child != null; child =
child.getNext()) {
+ sb.append(child.getChars());
+ }
+
+ return sb.toString().trim();
+ }
+
+ private String bulletListToString(BulletList list) {
+ StringBuilder sb = new StringBuilder();
+ for (Node item = list.getFirstChild(); item != null; item =
item.getNext()) {
+ if (item instanceof ListItem) {
+ sb.append("-
").append(extractTextFromChildren(item)).append("\n");
+ }
+ }
+
+ return sb.toString();
+ }
+
+ private String orderedListToString(OrderedList list) {
+ StringBuilder sb = new StringBuilder();
+ int num = 1;
+ for (Node item = list.getFirstChild(); item != null; item =
item.getNext()) {
+ if (item instanceof ListItem) {
+ sb.append(num++).append(".
").append(extractTextFromChildren(item)).append("\n");
+ }
+ }
+
+ return sb.toString();
+ }
+
+ private String tableToString(TableBlock table) {
+ StringBuilder sb = new StringBuilder();
+ for (Node row = table.getFirstChild(); row != null; row =
row.getNext()) {
+ if (row instanceof TableRow) {
+ for (Node cell = row.getFirstChild(); cell != null; cell =
cell.getNext()) {
+ if (cell instanceof TableCell) {
+ sb.append(((TableCell)
cell).getText().toString()).append(" | ");
+ }
+ }
+ sb.append("\n");
+ }
+ }
+
+ return sb.toString();
+ }
+
+ @Override
+ public SeaTunnelRowType getSeaTunnelRowTypeInfo(String path) throws
FileConnectorException {
+ return new SeaTunnelRowType(
+ new String[] {
+ "element_id",
+ "element_type",
+ "heading_level",
+ "text",
+ "page_number",
+ "position_index",
+ "parent_id",
+ "child_ids"
+ },
+ new org.apache.seatunnel.api.table.type.SeaTunnelDataType[] {
+ BasicType.STRING_TYPE,
+ BasicType.STRING_TYPE,
+ BasicType.INT_TYPE,
+ BasicType.STRING_TYPE,
+ BasicType.INT_TYPE,
+ BasicType.INT_TYPE,
+ BasicType.STRING_TYPE,
+ BasicType.STRING_TYPE
+ });
+ }
+}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategyTest.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategyTest.java
new file mode 100644
index 0000000000..bdd7189cb7
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/MarkdownReadStrategyTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.file.source.reader;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.net.URL;
+import java.nio.file.Paths;
+
+class MarkdownReadStrategyTest {
+
+ @Test
+ public void testReadMarkdown() throws Exception {
+ URL resource = this.getClass().getResource("/test.md");
+ String path = Paths.get(resource.toURI()).toString();
+ AbstractReadStrategy markdownReadStrategy = new MarkdownReadStrategy();
+ TempCollector tempCollector = new TempCollector();
+ markdownReadStrategy.read(path, "", tempCollector);
+
+ Assertions.assertEquals(75, tempCollector.getRows().size());
+
+ Assertions.assertEquals("Heading_1",
tempCollector.getRows().get(0).getField(0));
+ Assertions.assertEquals("Heading",
tempCollector.getRows().get(0).getField(1));
+ Assertions.assertEquals(1, tempCollector.getRows().get(0).getField(2));
+ Assertions.assertEquals(
+ "The Essential Guide to Groceries: Shopping, Storing, and
Enjoying Food at Home",
+ tempCollector.getRows().get(0).getField(3));
+ Assertions.assertEquals(1, tempCollector.getRows().get(0).getField(4));
+ Assertions.assertEquals(1, tempCollector.getRows().get(0).getField(5));
+ Assertions.assertNull(tempCollector.getRows().get(0).getField(6));
+ Assertions.assertNull(tempCollector.getRows().get(0).getField(7));
+
+ Assertions.assertEquals("OrderedList_1",
tempCollector.getRows().get(3).getField(0));
+ Assertions.assertEquals("OrderedList",
tempCollector.getRows().get(3).getField(1));
+ Assertions.assertNull(tempCollector.getRows().get(3).getField(2));
+ Assertions.assertEquals(
+ "1. [Introduction](#introduction)\n"
+ + "2. [Grocery Categories](#grocery-categories)\n"
+ + "3. [Planning Your Grocery
Trip](#planning-your-grocery-trip)\n"
+ + "4. [Shopping Tips for
Savings](#shopping-tips-for-savings)\n"
+ + "5. [Storing and Organizing
Groceries](#storing-and-organizing-groceries)\n"
+ + "6. [Healthy Choices](#healthy-choices)\n"
+ + "7. [Modern Grocery
Trends](#modern-grocery-trends)\n"
+ + "8. [Comparison Table](#comparison-table)\n"
+ + "9. [Conclusion](#conclusion)\n",
+ tempCollector.getRows().get(3).getField(3));
+ Assertions.assertEquals(1, tempCollector.getRows().get(3).getField(4));
+ Assertions.assertEquals(5, tempCollector.getRows().get(3).getField(5));
+ Assertions.assertNull(tempCollector.getRows().get(3).getField(6));
+ Assertions.assertEquals(
+
"OrderedListItem_1,OrderedListItem_2,OrderedListItem_3,OrderedListItem_4,OrderedListItem_5,OrderedListItem_6,OrderedListItem_7,OrderedListItem_8,OrderedListItem_9",
+ tempCollector.getRows().get(3).getField(7));
+
+ Assertions.assertEquals("OrderedListItem_1",
tempCollector.getRows().get(4).getField(0));
+ Assertions.assertEquals("OrderedListItem",
tempCollector.getRows().get(4).getField(1));
+ Assertions.assertNull(tempCollector.getRows().get(4).getField(2));
+ Assertions.assertEquals(
+ "[Introduction](#introduction)",
tempCollector.getRows().get(4).getField(3));
+ Assertions.assertEquals(1, tempCollector.getRows().get(4).getField(4));
+ Assertions.assertEquals(1, tempCollector.getRows().get(4).getField(5));
+ Assertions.assertEquals("OrderedList_1",
tempCollector.getRows().get(4).getField(6));
+ Assertions.assertNull(tempCollector.getRows().get(4).getField(7));
+ }
+}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TempCollector.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TempCollector.java
new file mode 100644
index 0000000000..9be1fbc949
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/TempCollector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.file.source.reader;
+
+import org.apache.seatunnel.api.source.Collector;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TempCollector implements Collector<SeaTunnelRow> {
+
+ private final List<SeaTunnelRow> rows = new ArrayList<>();
+
+ public List<SeaTunnelRow> getRows() {
+ return rows;
+ }
+
+ @Override
+ public void collect(SeaTunnelRow record) {
+ rows.add(record);
+ }
+
+ @Override
+ public Object getCheckpointLock() {
+ return null;
+ }
+}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/test.md
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/test.md
new file mode 100644
index 0000000000..319a7511ee
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/test.md
@@ -0,0 +1,146 @@
+# The Essential Guide to Groceries: Shopping, Storing, and Enjoying Food at
Home
+
+Groceries play a **vital role** in daily life, touching every aspect of
*health*, *convenience*, and *enjoyment*.
+This comprehensive guide covers all things groceries—from what to shop for,
strategies to save money, storage tips, and even how groceries have changed in
the modern era.
+
+---
+
+## Table of Contents
+
+1. [Introduction](#introduction)
+2. [Grocery Categories](#grocery-categories)
+3. [Planning Your Grocery Trip](#planning-your-grocery-trip)
+4. [Shopping Tips for Savings](#shopping-tips-for-savings)
+5. [Storing and Organizing Groceries](#storing-and-organizing-groceries)
+6. [Healthy Choices](#healthy-choices)
+7. [Modern Grocery Trends](#modern-grocery-trends)
+8. [Comparison Table](#comparison-table)
+9. [Conclusion](#conclusion)
+
+---
+
+## 1. Introduction
+
+Shopping for groceries is a **weekly** or even **daily ritual** for many
families worldwide.
+Whether visiting large supermarkets, local markets, or ordering online, the
process impacts nutrition, budget, and convenience.
+Groceries mean more than just food; they include household supplies, snacks,
beverages, and specialty items.
+Making *smart decisions* at the grocery store sets the stage for health and
happiness throughout the week.
+
+---
+
+## 2. Grocery Categories
+
+Groceries are commonly sorted into several essential categories:
+
+- **Fresh Produce**
+ *Fruits and vegetables*, the core of healthy meals.
+- **Meat & Seafood**
+ Chicken, beef, pork, fish, and other protein sources.
+- **Dairy & Eggs**
+ Milk, cheese, yogurt, butter, and eggs for versatile cooking.
+- **Pantry Staples**
+ Rice, pasta, flour, canned goods, oils, and spices.
+- **Frozen Foods**
+ Vegetables, pizzas, ice cream, ready-to-eat meals.
+- **Bakery Items**
+ Bread, rolls, bagels, tortillas, and pastries.
+- **Snacks & Treats**
+ Chips, cookies, nuts, granola bars, and chocolate.
+- **Beverages**
+ Water, juices, milk, coffee, tea, and soft drinks.
+- **Household Necessities**
+ Paper towels, cleaning supplies, toiletries.
+
+---
+
+## 3. Planning Your Grocery Trip
+
+Preparing before shopping helps avoid waste and impulse buying. Here are
useful steps:
+
+1. **Meal Planning**
+ Draft a weekly meal plan and list required ingredients.
+2. **Inventory Check**
+ Review refrigerator, pantry, and freezer for existing items.
+3. **List-Making**
+ Organize your grocery list by store section or category.
+4. **Budgeting**
+ Set a spending cap and track costs as you shop.
+5. **Coupon & Deal Review**
+ Find digital coupons, loyalty programs, and weekly specials.
+
+---
+
+## 4. Shopping Tips for Savings
+
+Grocery costs can be significant. Use these tips to save money and buy wisely:
+
+- **Buy Generic:** Store brands often provide similar quality at lower prices.
+- **Shop Seasonal:** Choose fruits and vegetables when they're in season for
better prices and flavor.
+- **Avoid Hungry Shopping:** Eat before you shop to reduce impulse purchases.
+- **Bulk Buying:** Purchase shelf-stable items in bulk if space permits.
+- **Unit Price Comparison:** Evaluate cost per ounce or gram to get the best
deal.
+- **Loyalty Rewards:** Join store programs for points, discounts, and
member-only deals.
+- **Digital Coupons:** Use apps to find and redeem coupons instantly.
+
+---
+
+## 5. Storing and Organizing Groceries
+
+Proper storage preserves freshness and avoids waste:
+
+- **Refrigerate Immediately:** Place perishable items like dairy and meat in
the fridge.
+- **Freeze Extras:** Use the freezer for surplus bread, meat, and vegetables.
+- **Vacuum Sealing:** Prevent freezer burn with vacuum-sealed storage.
+- **Pantry Organizing:** Store grains, snacks, and canned goods by category.
+- **Labeling:** Mark containers with purchase or expiration dates.
+
+> Smart organization makes meal prep smoother and prevents over-buying.
+---
+
+## 6. Healthy Choices
+
+Groceries lay the foundation for balanced nutrition:
+
+- **Read Labels:** Check nutritional facts for sugar, salt, and fat content.
+- **Whole Foods:** Prioritize unprocessed items like whole grains, lean meats,
and organic produce.
+- **Limit Snacks:** Treat chips, soda, and sweets as occasional indulgences.
+- **Plan Balanced Meals:** Include protein, carbohydrates, and healthy fats.
+- **Hydration:** Stock up on water, herbal tea, and low-sugar drinks.
+
+---
+
+## 7. Modern Grocery Trends
+
+Grocery shopping has evolved dramatically in recent years:
+
+- **Online Ordering:** Services deliver groceries to your home, saving time.
+- **Subscription Boxes:** Regular deliveries offer curated produce, snacks, or
meal kits.
+- **Sustainability:** Eco-friendly packaging and local sourcing are growing
trends.
+- **International Foods:** Stores now stock global products for multicultural
meals.
+- **Healthy Innovations:** Plant-based meat, gluten-free products, and organic
options abound.
+
+---
+
+## 8. Comparison Table
+
+| Category | Benefits | Tips
|
+|---------------------|------------------------------------------------|---------------------------|
+| Fresh Produce | Rich in vitamins and fiber | Buy
seasonal, local |
+| Meat & Seafood | High-quality protein |
Choose lean cuts, fresh |
+| Dairy & Eggs | Calcium and protein source |
Check for low-fat options |
+| Pantry Staples | Long shelf life, base for many meals | Buy
in bulk when possible |
+| Frozen Foods | Convenient, preserves nutrients |
Watch for added sodium |
+| Snacks & Treats | Quick energy boosts |
Limit frequency |
+
+---
+
+## 9. Conclusion
+
+Groceries are staples of daily life, empowering people to cook nutritious
meals and maintain an efficient household.
+By planning ahead, shopping smart, and storing groceries correctly, you can
save both *time* and *money*.
+Choosing healthy and sustainable products benefits your wellbeing and the
environment.
+With automation and digital tools, the grocery experience continues to
improve, making everyone's life easier and tastier.
+
+---
+
+*For more information, visit [Groceries Resource](https://example.com).*
\ No newline at end of file