[tesseract-ocr] Issue with Tesseract OCR: Difficulty Detecting White Text on Blue Background

Abdul Kalam Shaik Mon, 19 Aug 2024 09:45:36 -0700

Hello,

I am encountering an issue with Tesseract OCR when trying to detect white 
text on a blue background. Despite various preprocessing techniques, the 
OCR is not accurately recognizing the text on this specific background.


*Details:*

Tesseract Version: tesseract v5.0.0-alpha.20210506
Language Pack: English
*Image Characteristics:*
Background color: Blue
Text color: White
Image resolution: 1920X1080P
Image format:PNG
*Preprocessing Techniques Applied:*
1. Grayscale conversion
2. Contrast adjustment
3. Binary thresholding
4. Inversion of the image
5. Morphological operations
6. Increase Contrast
7. ROI
8. Convert the image to the HSV color space, Create a mask to isolate blue 
regions,Invert the mask to focus on the text and Using the mask to extract 
the white text
*  Script/Code Used:*  
import cv2
import pytesseract
import pyautogui
import time
import numpy as np

# Specify the path to the Tesseract executable if not in PATH
pytesseract.pytesseract.tesseract_cmd = r'C:\Program 
Files\Tesseract-OCR\tesseract.exe'


def preprocess_image_gray(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    cv2.imshow("Gray Scale Image", gray)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return gray


def preprocess_image_increase_contrast(image):
    # Increase contrast
    contrast = cv2.convertScaleAbs(image, alpha=1.5, beta=0)
    cv2.imshow("Increase contrast", contrast)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return contrast


def preprocess_image_gaussian_blur(image):
    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    cv2.imshow("GaussianBlur", blurred)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return blurred


def preprocess_image_edge_detection(image):
    # Perform edge detection
    edged = cv2.Canny(image, 50, 150)
    cv2.imshow("edge detection", edged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return edged


def preprocess_image_inverted(image):
    # Invert the image
    inverted_image = cv2.bitwise_not(image)
    cv2.imshow("Inverted Image", inverted_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return inverted_image


def preprocess_image_dialte_edges(image):
    # Dilate the edges
    dilated = cv2.dilate(image, None, iterations=2)
    cv2.imshow("dilate", dilated)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Bitwise-AND mask and original image
    result = cv2.bitwise_and(image, image, mask=dilated)
    cv2.imshow("Bitwise-AND mask and original image", result)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Invert the image
    inverted_image = cv2.bitwise_not(result)
    cv2.imshow("Inverted Image", inverted_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return inverted_image


def perform_ocr(image_path, text_to_find=None, config="--psm 6 --oem 3", 
preprocess_func=preprocess_image_gray):
    global ocr_results
    try:
        image = cv2.imread(image_path)
        image_preprocessed = preprocess_func(image)
        image_rgb = cv2.cvtColor(image_preprocessed, cv2.COLOR_BGR2RGB)
        ocr_data = pytesseract.image_to_data(image_rgb, 
output_type=pytesseract.Output.DICT, config=config)

        if text_to_find is not None and not isinstance(text_to_find, list):
            text_to_find = [text_to_find]

        ocr_results = []
        for i in range(len(ocr_data['text'])):
            text = ocr_data['text'][i].strip()
            if not text:
                continue

            confidence = float(ocr_data['conf'][i]) / 100.0  # Convert 
confidence to decimal
            if confidence < 0.2:  # Ignore results with confidence less 
than 0.5
                continue

            bbox = {
                "text": text,
                "left": ocr_data['left'][i],
                "right": ocr_data['left'][i] + ocr_data['width'][i],
                "top": ocr_data['top'][i],
                "bottom": ocr_data['top'][i] + ocr_data['height'][i],
                "confidence": float(ocr_data['conf'][i]) / 100.0  # Convert 
confidence to decimal
            }
            ocr_results.append(bbox)
    except Exception as e:
        print(f"An error occurred in the main function: {e}")

    return ocr_results


def draw_boxes(image_path, ocr_results, output_image_path):
    image = cv2.imread(image_path)

    for result in ocr_results:
        x, y, w, h = result['left'], result['top'], result['right'] - 
result['left'], result['bottom'] - result['top']
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        distance_text = f"{result['text']} ({result['confidence']:.2f})"
        if 'distance' in result:
            distance_text += f" ({result['distance']:.2f})"
        cv2.putText(image, distance_text, (x, y - 10), 
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.imwrite(output_image_path, image)


def increase_brightness(img, value=50):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    v = cv2.add(v, value)
    v[v > 255] = 255
    v[v < 0] = 0

    final_hsv = cv2.merge((h, s, v))
    brightened_img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return brightened_img


def isolate_white_text_on_blue(image):
    # Increase brightness
    brightened = increase_brightness(image, value=60)
    cv2.imshow("Brightened image", brightened)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # Convert the image to the HSV color space
    hsv = cv2.cvtColor(brightened, cv2.COLOR_BGR2HSV)
    cv2.imshow("HSV converted image", hsv)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # Define the range of blue colors in HSV
    lower_blue = np.array([100, 150, 0])
    upper_blue = np.array([140, 255, 255])

    # Create a mask to isolate blue regions
    blue_mask = cv2.inRange(hsv, lower_blue, upper_blue)
    cv2.imshow("Blue Mask image", blue_mask)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # Invert the mask to focus on the text
    blue_mask_inv = cv2.bitwise_not(blue_mask)
    cv2.imshow(" Inverted Mask image", blue_mask_inv)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # Use the mask to extract the white text
    white_text_on_blue = cv2.bitwise_and(brightened, brightened, 
mask=blue_mask_inv)
    cv2.imshow("White Text image", blue_mask_inv)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return white_text_on_blue


def move_and_click(ocr_results):
    for result in ocr_results:
        x_center = (result['left'] + result['right']) // 2
        y_center = (result['top'] + result['bottom']) // 2
        pyautogui.moveTo(x_center, y_center)
        pyautogui.click()
        time.sleep(1)  # Sleep for a second between clicks for safety


def main():
    image_path = 'path-to-image.png'
    output_json_path = 'path-to-image.json'
    output_image_path = 'path-to-outputimage.png'

    text_to_find = []

    ocr_results_gray = perform_ocr(image_path, text_to_find, config="--psm 
11 --oem 3",
                                   preprocess_func=preprocess_image_gray)
    print(f"OCR  gray results are: {ocr_results_gray}")
    ocr_results_contrast = perform_ocr(image_path, text_to_find, 
config="--psm 11 --oem 3",
                                      
 preprocess_func=preprocess_image_increase_contrast)
    print(f"OCR  contrast results are: {ocr_results_contrast}")
    ocr_results_gaussian = perform_ocr(image_path, text_to_find, 
config="--psm 11 --oem 3",
                                      
 preprocess_func=preprocess_image_gaussian_blur)
    print(f"OCR  gaussian results are: {ocr_results_gaussian}")
    ocr_results_edge = perform_ocr(image_path, text_to_find, config="--psm 
11 --oem 3",
                                  
 preprocess_func=preprocess_image_edge_detection)
    print(f"OCR  edge results are: {ocr_results_edge}")
    ocr_results_dialte = perform_ocr(image_path, text_to_find, 
config="--psm 11 --oem 3",
                                    
 preprocess_func=preprocess_image_dialte_edges)
    print(f"OCR  Dialte results are: {ocr_results_dialte}")
    ocr_results_invert = perform_ocr(image_path, text_to_find, 
config="--psm 11 --oem 3",
                                    
 preprocess_func=preprocess_image_inverted)
    print(f"OCR  Invert results are: {ocr_results_invert}")
    ocr_results_isolate = perform_ocr(image_path, text_to_find, 
config="--psm 11 --oem 3",
                                      
preprocess_func=isolate_white_text_on_blue)
    print(f"OCR Isolate results are: {ocr_results_isolate}")
    
    ocr_results = []
    if isinstance(ocr_results_gray, list) and 
isinstance(ocr_results_isolate, list) and isinstance(ocr_results_invert,
                                                                            
                       list) and isinstance(
            ocr_results_contrast, list) and 
isinstance(ocr_results_gaussian, list) and isinstance(ocr_results_dialte,
                                                                            
                      list) and isinstance(
            ocr_results_edge, list):
        ocr_results = ocr_results_isolate + ocr_results_gray + 
ocr_results_contrast + ocr_results_gaussian + ocr_results_dialte + 
ocr_results_edge + ocr_results_invert
    else:
        print("OCR results are not in the expected list format.")
   

    for i, result in enumerate(ocr_results, start=1):
        bounding_box_info = (
            f"Bounding box: Text = {result['text']}, Left = 
{result['left']}, Top = {result['top']}, "
            f"Right = {result['right']}, Bottom = {result['bottom']}, 
Confidence = {result['confidence']:.2f}"
        )
        print(bounding_box_info)
        print()

    draw_boxes(image_path, ocr_results, output_image_path)

    print(ocr_results)


if __name__ == "__main__":
    main()

*Issue:*
Despite trying the above preprocessing techniques, the OCR output is still 
missing or incorrectly recognizing the text on the blue background. I have 
also tried adjusting the thresholding and brightness levels, but without 
success.

*Question:*
Could anyone provide suggestions on additional preprocessing techniques or 
modifications to the Tesseract OCR settings that might help improve the 
detection accuracy for white text on a blue background or any other colored 
background?

Thank you in advance for your assistance!

-- 
You received this message because you are subscribed to the Google Groups 
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/tesseract-ocr/0ce2e54e-1f1a-4fca-8c74-286c9641509en%40googlegroups.com.

[tesseract-ocr] Issue with Tesseract OCR: Difficulty Detecting White Text on Blue Background

Reply via email to