Re: [tesseract-ocr] Issue with Tesseract OCR: Difficulty Detecting White Text on Blue Background

Ger Hobbelt Tue, 20 Aug 2024 03:43:37 -0700

Generally, it is best to convert to greyscale with black text on white
background. Seems you tried that so questions remain.
Please include one or two sample images which exhibits your problem, so
folks around here have something to test against.


Ciao,

Ger

On Mon, 19 Aug 2024, 18:45 Abdul Kalam Shaik, <[email protected]>
wrote:

> Hello,
>
> I am encountering an issue with Tesseract OCR when trying to detect white
> text on a blue background. Despite various preprocessing techniques, the
> OCR is not accurately recognizing the text on this specific background.
>
> *Details:*
>
> Tesseract Version: tesseract v5.0.0-alpha.20210506
> Language Pack: English
> *Image Characteristics:*
> Background color: Blue
> Text color: White
> Image resolution: 1920X1080P
> Image format:PNG
> *Preprocessing Techniques Applied:*
> 1. Grayscale conversion
> 2. Contrast adjustment
> 3. Binary thresholding
> 4. Inversion of the image
> 5. Morphological operations
> 6. Increase Contrast
> 7. ROI
> 8. Convert the image to the HSV color space, Create a mask to isolate blue
> regions,Invert the mask to focus on the text and Using the mask to extract
> the white text
> *  Script/Code Used:*
> import cv2
> import pytesseract
> import pyautogui
> import time
> import numpy as np
>
> # Specify the path to the Tesseract executable if not in PATH
> pytesseract.pytesseract.tesseract_cmd = r'C:\Program
> Files\Tesseract-OCR\tesseract.exe'
>
>
> def preprocess_image_gray(image):
>     # Convert to grayscale
>     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
>     cv2.imshow("Gray Scale Image", gray)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     return gray
>
>
> def preprocess_image_increase_contrast(image):
>     # Increase contrast
>     contrast = cv2.convertScaleAbs(image, alpha=1.5, beta=0)
>     cv2.imshow("Increase contrast", contrast)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     return contrast
>
>
> def preprocess_image_gaussian_blur(image):
>     # Apply Gaussian blur
>     blurred = cv2.GaussianBlur(image, (5, 5), 0)
>     cv2.imshow("GaussianBlur", blurred)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     return blurred
>
>
> def preprocess_image_edge_detection(image):
>     # Perform edge detection
>     edged = cv2.Canny(image, 50, 150)
>     cv2.imshow("edge detection", edged)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     return edged
>
>
> def preprocess_image_inverted(image):
>     # Invert the image
>     inverted_image = cv2.bitwise_not(image)
>     cv2.imshow("Inverted Image", inverted_image)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>
>     return inverted_image
>
>
> def preprocess_image_dialte_edges(image):
>     # Dilate the edges
>     dilated = cv2.dilate(image, None, iterations=2)
>     cv2.imshow("dilate", dilated)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>
>     # Bitwise-AND mask and original image
>     result = cv2.bitwise_and(image, image, mask=dilated)
>     cv2.imshow("Bitwise-AND mask and original image", result)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>
>     # Invert the image
>     inverted_image = cv2.bitwise_not(result)
>     cv2.imshow("Inverted Image", inverted_image)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>
>     return inverted_image
>
>
> def perform_ocr(image_path, text_to_find=None, config="--psm 6 --oem 3",
> preprocess_func=preprocess_image_gray):
>     global ocr_results
>     try:
>         image = cv2.imread(image_path)
>         image_preprocessed = preprocess_func(image)
>         image_rgb = cv2.cvtColor(image_preprocessed, cv2.COLOR_BGR2RGB)
>         ocr_data = pytesseract.image_to_data(image_rgb,
> output_type=pytesseract.Output.DICT, config=config)
>
>         if text_to_find is not None and not isinstance(text_to_find, list):
>             text_to_find = [text_to_find]
>
>         ocr_results = []
>         for i in range(len(ocr_data['text'])):
>             text = ocr_data['text'][i].strip()
>             if not text:
>                 continue
>
>             confidence = float(ocr_data['conf'][i]) / 100.0  # Convert
> confidence to decimal
>             if confidence < 0.2:  # Ignore results with confidence less
> than 0.5
>                 continue
>
>             bbox = {
>                 "text": text,
>                 "left": ocr_data['left'][i],
>                 "right": ocr_data['left'][i] + ocr_data['width'][i],
>                 "top": ocr_data['top'][i],
>                 "bottom": ocr_data['top'][i] + ocr_data['height'][i],
>                 "confidence": float(ocr_data['conf'][i]) / 100.0  #
> Convert confidence to decimal
>             }
>             ocr_results.append(bbox)
>     except Exception as e:
>         print(f"An error occurred in the main function: {e}")
>
>     return ocr_results
>
>
> def draw_boxes(image_path, ocr_results, output_image_path):
>     image = cv2.imread(image_path)
>
>     for result in ocr_results:
>         x, y, w, h = result['left'], result['top'], result['right'] -
> result['left'], result['bottom'] - result['top']
>         cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
>         distance_text = f"{result['text']} ({result['confidence']:.2f})"
>         if 'distance' in result:
>             distance_text += f" ({result['distance']:.2f})"
>         cv2.putText(image, distance_text, (x, y - 10),
> cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
>
>     cv2.imwrite(output_image_path, image)
>
>
> def increase_brightness(img, value=50):
>     hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
>     h, s, v = cv2.split(hsv)
>
>     v = cv2.add(v, value)
>     v[v > 255] = 255
>     v[v < 0] = 0
>
>     final_hsv = cv2.merge((h, s, v))
>     brightened_img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
>     return brightened_img
>
>
> def isolate_white_text_on_blue(image):
>     # Increase brightness
>     brightened = increase_brightness(image, value=60)
>     cv2.imshow("Brightened image", brightened)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     # Convert the image to the HSV color space
>     hsv = cv2.cvtColor(brightened, cv2.COLOR_BGR2HSV)
>     cv2.imshow("HSV converted image", hsv)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     # Define the range of blue colors in HSV
>     lower_blue = np.array([100, 150, 0])
>     upper_blue = np.array([140, 255, 255])
>
>     # Create a mask to isolate blue regions
>     blue_mask = cv2.inRange(hsv, lower_blue, upper_blue)
>     cv2.imshow("Blue Mask image", blue_mask)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     # Invert the mask to focus on the text
>     blue_mask_inv = cv2.bitwise_not(blue_mask)
>     cv2.imshow(" Inverted Mask image", blue_mask_inv)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>     # Use the mask to extract the white text
>     white_text_on_blue = cv2.bitwise_and(brightened, brightened,
> mask=blue_mask_inv)
>     cv2.imshow("White Text image", blue_mask_inv)
>     cv2.waitKey(0)
>     cv2.destroyAllWindows()
>
>     return white_text_on_blue
>
>
> def move_and_click(ocr_results):
>     for result in ocr_results:
>         x_center = (result['left'] + result['right']) // 2
>         y_center = (result['top'] + result['bottom']) // 2
>         pyautogui.moveTo(x_center, y_center)
>         pyautogui.click()
>         time.sleep(1)  # Sleep for a second between clicks for safety
>
>
> def main():
>     image_path = 'path-to-image.png'
>     output_json_path = 'path-to-image.json'
>     output_image_path = 'path-to-outputimage.png'
>
>     text_to_find = []
>
>     ocr_results_gray = perform_ocr(image_path, text_to_find, config="--psm
> 11 --oem 3",
>                                    preprocess_func=preprocess_image_gray)
>     print(f"OCR  gray results are: {ocr_results_gray}")
>     ocr_results_contrast = perform_ocr(image_path, text_to_find,
> config="--psm 11 --oem 3",
>
>  preprocess_func=preprocess_image_increase_contrast)
>     print(f"OCR  contrast results are: {ocr_results_contrast}")
>     ocr_results_gaussian = perform_ocr(image_path, text_to_find,
> config="--psm 11 --oem 3",
>
>  preprocess_func=preprocess_image_gaussian_blur)
>     print(f"OCR  gaussian results are: {ocr_results_gaussian}")
>     ocr_results_edge = perform_ocr(image_path, text_to_find, config="--psm
> 11 --oem 3",
>
>  preprocess_func=preprocess_image_edge_detection)
>     print(f"OCR  edge results are: {ocr_results_edge}")
>     ocr_results_dialte = perform_ocr(image_path, text_to_find,
> config="--psm 11 --oem 3",
>
>  preprocess_func=preprocess_image_dialte_edges)
>     print(f"OCR  Dialte results are: {ocr_results_dialte}")
>     ocr_results_invert = perform_ocr(image_path, text_to_find,
> config="--psm 11 --oem 3",
>
>  preprocess_func=preprocess_image_inverted)
>     print(f"OCR  Invert results are: {ocr_results_invert}")
>     ocr_results_isolate = perform_ocr(image_path, text_to_find,
> config="--psm 11 --oem 3",
>
> preprocess_func=isolate_white_text_on_blue)
>     print(f"OCR Isolate results are: {ocr_results_isolate}")
>
>     ocr_results = []
>     if isinstance(ocr_results_gray, list) and
> isinstance(ocr_results_isolate, list) and isinstance(ocr_results_invert,
>
>                          list) and isinstance(
>             ocr_results_contrast, list) and
> isinstance(ocr_results_gaussian, list) and isinstance(ocr_results_dialte,
>
>                         list) and isinstance(
>             ocr_results_edge, list):
>         ocr_results = ocr_results_isolate + ocr_results_gray +
> ocr_results_contrast + ocr_results_gaussian + ocr_results_dialte +
> ocr_results_edge + ocr_results_invert
>     else:
>         print("OCR results are not in the expected list format.")
>
>
>     for i, result in enumerate(ocr_results, start=1):
>         bounding_box_info = (
>             f"Bounding box: Text = {result['text']}, Left =
> {result['left']}, Top = {result['top']}, "
>             f"Right = {result['right']}, Bottom = {result['bottom']},
> Confidence = {result['confidence']:.2f}"
>         )
>         print(bounding_box_info)
>         print()
>
>     draw_boxes(image_path, ocr_results, output_image_path)
>
>     print(ocr_results)
>
>
> if __name__ == "__main__":
>     main()
>
> *Issue:*
> Despite trying the above preprocessing techniques, the OCR output is still
> missing or incorrectly recognizing the text on the blue background. I have
> also tried adjusting the thresholding and brightness levels, but without
> success.
>
> *Question:*
> Could anyone provide suggestions on additional preprocessing techniques or
> modifications to the Tesseract OCR settings that might help improve the
> detection accuracy for white text on a blue background or any other colored
> background?
>
> Thank you in advance for your assistance!
>
> --
> You received this message because you are subscribed to the Google Groups
> "tesseract-ocr" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/tesseract-ocr/0ce2e54e-1f1a-4fca-8c74-286c9641509en%40googlegroups.com
> <https://groups.google.com/d/msgid/tesseract-ocr/0ce2e54e-1f1a-4fca-8c74-286c9641509en%40googlegroups.com?utm_medium=email&utm_source=footer>
> .
>

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/CAFP60fop1yVuu_K3b%2BwjtuuYxgxCCRyZ018YFGArT%2B3CDpqpRQ%40mail.gmail.com.

Re: [tesseract-ocr] Issue with Tesseract OCR: Difficulty Detecting White Text on Blue Background

Reply via email to