Skip to content

Utilities API

Configurations

pymordialblue.utils.configs

Utility for loading the central configuration file.

The configuration is loaded lazily on first import and cached for the lifetime of the process.

get_config()

Retrieves the configuration dictionary.

Loads package defaults from src/pymordial/configs.yaml and merges with user config.yaml at project root if present.

Returns:

Type Description
PymordialBlueConfig

The configuration dictionary.

Raises:

Type Description
FileNotFoundError

If package config not found.

ValueError

If the configuration is invalid.

Source code in src/pymordialblue/utils/configs.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def get_config() -> PymordialBlueConfig:
    """Retrieves the configuration dictionary.

    Loads package defaults from src/pymordial/configs.yaml
    and merges with user config.yaml at project root if present.

    Returns:
        The configuration dictionary.

    Raises:
        FileNotFoundError: If package config not found.
        ValueError: If the configuration is invalid.
    """
    global _CONFIG
    if _CONFIG is None:
        _load_config()
    return _CONFIG  # type: ignore

Extract Strategies

pymordialblue.utils.extract_strategies

OCR extraction strategies for preprocessing images.

DefaultExtractStrategy

Bases: PymordialExtractStrategy

Generic preprocessing suitable for any image.

Features: - Upscale 2× - Grayscale conversion - Denoising - Otsu thresholding - Inversion if needed (ensuring black text on white background) - Uses standard Tesseract config --oem 3 --psm 6.

Source code in src/pymordialblue/utils/extract_strategies.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class DefaultExtractStrategy(PymordialExtractStrategy):
    """Generic preprocessing suitable for any image.

    Features:
    - Upscale 2×
    - Grayscale conversion
    - Denoising
    - Otsu thresholding
    - Inversion if needed (ensuring black text on white background)
    - Uses standard Tesseract config ``--oem 3 --psm 6``.
    """

    def preprocess(self, image: np.ndarray) -> np.ndarray:
        """Preprocesses the image using standard techniques."""
        # Upscale
        image = cv2.resize(
            image,
            None,
            fx=DEFAULT_UPSCALE_FACTOR,
            fy=DEFAULT_UPSCALE_FACTOR,
            interpolation=cv2.INTER_CUBIC,
        )
        # Grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # Denoise
        denoised = cv2.fastNlMeansDenoising(
            gray,
            None,
            DEFAULT_DENOISE_STRENGTH,
            DEFAULT_DENOISE_TEMPLATE_WINDOW,
            DEFAULT_DENOISE_SEARCH_WINDOW,
        )
        # Otsu threshold
        _, thresh = cv2.threshold(
            denoised, 0, THRESHOLD_BINARY_MAX, cv2.THRESH_BINARY + cv2.THRESH_OTSU
        )
        # Invert if background is dark
        if np.mean(thresh) < INVERSION_THRESHOLD_MEAN:
            thresh = cv2.bitwise_not(thresh)
        return thresh

    def tesseract_config(self) -> str:
        """Returns the default Tesseract configuration."""
        return TESSERACT_CONFIG_DEFAULT

preprocess(image)

Preprocesses the image using standard techniques.

Source code in src/pymordialblue/utils/extract_strategies.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def preprocess(self, image: np.ndarray) -> np.ndarray:
    """Preprocesses the image using standard techniques."""
    # Upscale
    image = cv2.resize(
        image,
        None,
        fx=DEFAULT_UPSCALE_FACTOR,
        fy=DEFAULT_UPSCALE_FACTOR,
        interpolation=cv2.INTER_CUBIC,
    )
    # Grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Denoise
    denoised = cv2.fastNlMeansDenoising(
        gray,
        None,
        DEFAULT_DENOISE_STRENGTH,
        DEFAULT_DENOISE_TEMPLATE_WINDOW,
        DEFAULT_DENOISE_SEARCH_WINDOW,
    )
    # Otsu threshold
    _, thresh = cv2.threshold(
        denoised, 0, THRESHOLD_BINARY_MAX, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    # Invert if background is dark
    if np.mean(thresh) < INVERSION_THRESHOLD_MEAN:
        thresh = cv2.bitwise_not(thresh)
    return thresh

tesseract_config()

Returns the default Tesseract configuration.

Source code in src/pymordialblue/utils/extract_strategies.py
106
107
108
def tesseract_config(self) -> str:
    """Returns the default Tesseract configuration."""
    return TESSERACT_CONFIG_DEFAULT

RevomonTextStrategy

Bases: PymordialExtractStrategy

Strategy for Revomon UI images.

Attributes:

Name Type Description
mode

The processing mode ("default", "move", "level").

debug_output_dir

Directory to save debug images.

Source code in src/pymordialblue/utils/extract_strategies.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
class RevomonTextStrategy(PymordialExtractStrategy):
    """Strategy for Revomon UI images.

    Attributes:
        mode: The processing mode ("default", "move", "level").
        debug_output_dir: Directory to save debug images.
    """

    def __init__(self, mode: str = MODE_DEFAULT, debug_output_dir: str | None = None):
        """Initializes the RevomonTextStrategy.

        Args:
            mode: The processing mode.
                "default" – generic processing.
                "move" – crops icon/energy bar, upscales 3x.
                "level" – crops "lvl" text, returns digits only.
            debug_output_dir: If provided, saves preprocessed images to this
                directory for debugging.
        """
        self.mode = mode
        self.debug_output_dir = debug_output_dir
        self._default = DefaultExtractStrategy()
        self._debug_counter = 0

    def preprocess(self, image: np.ndarray) -> np.ndarray:
        """Preprocesses the image based on the selected mode."""
        if self.mode == MODE_MOVE:
            # Custom pipeline for move buttons
            h, w = image.shape[:2]
            y_end = h - int(h * MOVE_BUTTON_CROP_BOTTOM_RATIO)
            cropped = image[0:y_end, int(w * MOVE_BUTTON_CROP_LEFT_RATIO) :]

            # Upscale 3x for move buttons (helps with small text like 'Phantom Force')
            processed = cv2.resize(
                cropped,
                None,
                fx=MOVE_UPSCALE_FACTOR,
                fy=MOVE_UPSCALE_FACTOR,
                interpolation=cv2.INTER_CUBIC,
            )
            # Grayscale
            gray = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
            # Denoise (keep it low to avoid blurring)
            denoised = cv2.fastNlMeansDenoising(
                gray,
                None,
                DEFAULT_DENOISE_STRENGTH,
                DEFAULT_DENOISE_TEMPLATE_WINDOW,
                DEFAULT_DENOISE_SEARCH_WINDOW,
            )
            # Otsu threshold
            _, thresh = cv2.threshold(
                denoised, 0, THRESHOLD_BINARY_MAX, cv2.THRESH_BINARY + cv2.THRESH_OTSU
            )
            # Invert if background is dark (it usually is for buttons)
            if np.mean(thresh) < INVERSION_THRESHOLD_MEAN:
                thresh = cv2.bitwise_not(thresh)

            processed = thresh

            # Add padding
            processed = cv2.copyMakeBorder(
                processed,
                MOVE_BUTTON_PADDING,
                MOVE_BUTTON_PADDING,
                MOVE_BUTTON_PADDING,
                MOVE_BUTTON_PADDING,
                cv2.BORDER_CONSTANT,
                value=PADDING_VALUE_WHITE,
            )
        elif self.mode == MODE_LEVEL:
            # Custom pipeline for level indicators: crop out "lvl" text
            h, w = image.shape[:2]
            cropped = image[:, int(w * LEVEL_TEXT_CROP_LEFT_RATIO) :]
            processed = self._default.preprocess(cropped)
        else:
            # Use default pipeline for other modes
            processed = self._default.preprocess(image)

        # Save debug image if debug directory is set
        if self.debug_output_dir:
            os.makedirs(self.debug_output_dir, exist_ok=True)
            debug_path = os.path.join(
                self.debug_output_dir,
                f"debug_{self.mode}_{self._debug_counter:03d}.png",
            )
            cv2.imwrite(debug_path, processed)
            self._debug_counter += 1

        return processed

    def postprocess_text(self, text: str) -> str:
        """Clean up OCR artifacts for Revomon UI text."""
        if self.mode == MODE_MOVE:
            # Replace newlines with spaces (multi-line moves like "Phantom Force")
            text = text.replace("\n", " ").replace("\r", " ")
            # Remove multiple spaces
            text = re.sub(r"\s+", " ", text)
            # Strip leading/trailing punctuation artifacts (commas, periods, semicolons, etc.)
            text = re.sub(r"^[^\w\s]+|[^\w\s]+$", "", text)
            # Remove any words less than 2 characters
            text = re.sub(r"\b\w{1,2}\b", "", text)
            # Final trim
            text = text.strip()
        elif self.mode == MODE_LEVEL:
            # For levels, just strip whitespace (we only get digits anyway)
            text = text.strip()

        return text

    def tesseract_config(self) -> str:
        """Returns the Tesseract configuration for the current mode."""
        base = TESSERACT_BASE_CONFIG
        if self.mode == MODE_MOVE:
            # Use PSM 6 (Block) for move buttons to handle multi-word text
            return f"{base} --psm {PSM_BLOCK} {MOVE_BUTTON_WHITELIST_CONFIG}"
        elif self.mode == MODE_LEVEL:
            # Use PSM 7 (Single Line) with digit-only whitelist
            return f"{base} --psm {PSM_SINGLE_LINE} {LEVEL_WHITELIST_CONFIG}"
        else:
            return f"{base} --psm {PSM_BLOCK}"

__init__(mode=MODE_DEFAULT, debug_output_dir=None)

Initializes the RevomonTextStrategy.

Parameters:

Name Type Description Default
mode str

The processing mode. "default" – generic processing. "move" – crops icon/energy bar, upscales 3x. "level" – crops "lvl" text, returns digits only.

MODE_DEFAULT
debug_output_dir str | None

If provided, saves preprocessed images to this directory for debugging.

None
Source code in src/pymordialblue/utils/extract_strategies.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def __init__(self, mode: str = MODE_DEFAULT, debug_output_dir: str | None = None):
    """Initializes the RevomonTextStrategy.

    Args:
        mode: The processing mode.
            "default" – generic processing.
            "move" – crops icon/energy bar, upscales 3x.
            "level" – crops "lvl" text, returns digits only.
        debug_output_dir: If provided, saves preprocessed images to this
            directory for debugging.
    """
    self.mode = mode
    self.debug_output_dir = debug_output_dir
    self._default = DefaultExtractStrategy()
    self._debug_counter = 0

postprocess_text(text)

Clean up OCR artifacts for Revomon UI text.

Source code in src/pymordialblue/utils/extract_strategies.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def postprocess_text(self, text: str) -> str:
    """Clean up OCR artifacts for Revomon UI text."""
    if self.mode == MODE_MOVE:
        # Replace newlines with spaces (multi-line moves like "Phantom Force")
        text = text.replace("\n", " ").replace("\r", " ")
        # Remove multiple spaces
        text = re.sub(r"\s+", " ", text)
        # Strip leading/trailing punctuation artifacts (commas, periods, semicolons, etc.)
        text = re.sub(r"^[^\w\s]+|[^\w\s]+$", "", text)
        # Remove any words less than 2 characters
        text = re.sub(r"\b\w{1,2}\b", "", text)
        # Final trim
        text = text.strip()
    elif self.mode == MODE_LEVEL:
        # For levels, just strip whitespace (we only get digits anyway)
        text = text.strip()

    return text

preprocess(image)

Preprocesses the image based on the selected mode.

Source code in src/pymordialblue/utils/extract_strategies.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def preprocess(self, image: np.ndarray) -> np.ndarray:
    """Preprocesses the image based on the selected mode."""
    if self.mode == MODE_MOVE:
        # Custom pipeline for move buttons
        h, w = image.shape[:2]
        y_end = h - int(h * MOVE_BUTTON_CROP_BOTTOM_RATIO)
        cropped = image[0:y_end, int(w * MOVE_BUTTON_CROP_LEFT_RATIO) :]

        # Upscale 3x for move buttons (helps with small text like 'Phantom Force')
        processed = cv2.resize(
            cropped,
            None,
            fx=MOVE_UPSCALE_FACTOR,
            fy=MOVE_UPSCALE_FACTOR,
            interpolation=cv2.INTER_CUBIC,
        )
        # Grayscale
        gray = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
        # Denoise (keep it low to avoid blurring)
        denoised = cv2.fastNlMeansDenoising(
            gray,
            None,
            DEFAULT_DENOISE_STRENGTH,
            DEFAULT_DENOISE_TEMPLATE_WINDOW,
            DEFAULT_DENOISE_SEARCH_WINDOW,
        )
        # Otsu threshold
        _, thresh = cv2.threshold(
            denoised, 0, THRESHOLD_BINARY_MAX, cv2.THRESH_BINARY + cv2.THRESH_OTSU
        )
        # Invert if background is dark (it usually is for buttons)
        if np.mean(thresh) < INVERSION_THRESHOLD_MEAN:
            thresh = cv2.bitwise_not(thresh)

        processed = thresh

        # Add padding
        processed = cv2.copyMakeBorder(
            processed,
            MOVE_BUTTON_PADDING,
            MOVE_BUTTON_PADDING,
            MOVE_BUTTON_PADDING,
            MOVE_BUTTON_PADDING,
            cv2.BORDER_CONSTANT,
            value=PADDING_VALUE_WHITE,
        )
    elif self.mode == MODE_LEVEL:
        # Custom pipeline for level indicators: crop out "lvl" text
        h, w = image.shape[:2]
        cropped = image[:, int(w * LEVEL_TEXT_CROP_LEFT_RATIO) :]
        processed = self._default.preprocess(cropped)
    else:
        # Use default pipeline for other modes
        processed = self._default.preprocess(image)

    # Save debug image if debug directory is set
    if self.debug_output_dir:
        os.makedirs(self.debug_output_dir, exist_ok=True)
        debug_path = os.path.join(
            self.debug_output_dir,
            f"debug_{self.mode}_{self._debug_counter:03d}.png",
        )
        cv2.imwrite(debug_path, processed)
        self._debug_counter += 1

    return processed

tesseract_config()

Returns the Tesseract configuration for the current mode.

Source code in src/pymordialblue/utils/extract_strategies.py
221
222
223
224
225
226
227
228
229
230
231
def tesseract_config(self) -> str:
    """Returns the Tesseract configuration for the current mode."""
    base = TESSERACT_BASE_CONFIG
    if self.mode == MODE_MOVE:
        # Use PSM 6 (Block) for move buttons to handle multi-word text
        return f"{base} --psm {PSM_BLOCK} {MOVE_BUTTON_WHITELIST_CONFIG}"
    elif self.mode == MODE_LEVEL:
        # Use PSM 7 (Single Line) with digit-only whitelist
        return f"{base} --psm {PSM_SINGLE_LINE} {LEVEL_WHITELIST_CONFIG}"
    else:
        return f"{base} --psm {PSM_BLOCK}"