e-mhui commented on code in PR #9535: URL: https://github.com/apache/seatunnel/pull/9535#discussion_r2188365957
########## seatunnel-engine/seatunnel-engine-client/src/test/resources/batch_fake_to_console_multi_table.conf: ########## @@ -15,7 +15,7 @@ # limitations under the License. # ###### -###### This config file is a demonstration of streaming processing in seatunnel config +###### This config file is a demonstration of batch processing in seatunnel config Review Comment: However, there are too many files that need to be modified, so I wrote a Python script to detect irregular configuration files. The detection rules are as follows: 1. The file must be a text file starting with either "batch" or "streaming" and ending with ".conf". 2. If it is a streaming job, the file name must start with "streaming", the comment in the file must be "This config file is a demonstration of streaming processing", and the configuration must be job.mode = "streaming". 3. If it is a batch job, the file name must start with "batch", the comment in the file must be "This config file is a demonstration of batch processing", and the configuration must be job.mode = "batch". All scanned files have been modified, but there are two remaining files that are questionable. They are streaming jobs, but their file names start with "batch": 1. https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-core-e2e/seatunnel-starter-e2e/src/test/resources/batch_cancel_task_1.conf 2. https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-core-e2e/seatunnel-starter-e2e/src/test/resources/batch_cancel_task_2.conf The complete scanning script is as follows: ```python import os import re def is_valid_config_file(file_path): """ Check if a SeaTunnel config file meets the specifications :param file_path: Full path of the file :return: Tuple of (is_valid, error_message) """ try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Check streaming processing config if "This config file is a demonstration of streaming processing" in content: file_name = os.path.basename(file_path).lower() if not file_name.startswith('streaming'): return False, "Streaming config file should start with 'streaming'" if not re.search(r'job\.mode\s*=\s*["\']STREAMING["\']', content, re.IGNORECASE): return False, "Streaming config file should contain job.mode = \"STREAMING\"" return True, None # Check batch processing config elif "This config file is a demonstration of batch processing" in content: file_name = os.path.basename(file_path).lower() if not file_name.startswith('batch'): return False, "Batch config file should start with 'batch'" if not re.search(r'job\.mode\s*=\s*["\']batch["\']', content, re.IGNORECASE): return False, "Batch config file should contain job.mode = \"batch\"" return True, None return False, "Not a valid SeaTunnel config file" except Exception as e: return False, f"Failed to read file: {str(e)}" def scan_config_files(directory): """ Recursively scan all SeaTunnel config files in a directory :param directory: Directory path to scan :return: List of invalid files, each element is a tuple of (file_path, error_message) """ invalid_files = [] for root, dirs, files in os.walk(directory): # Skip target directory[4](@ref) if 'target' in dirs: dirs.remove('target') for file_name in files: # Only check files starting with batch/streaming and ending with .conf[7](@ref) lower_name = file_name.lower() if (lower_name.startswith('batch') or lower_name.startswith( 'streaming')) and lower_name.endswith('.conf'): file_path = os.path.join(root, file_name) is_valid, error_msg = is_valid_config_file(file_path) if not is_valid: invalid_files.append((file_path, error_msg)) return invalid_files def print_invalid_files(invalid_files): """ Print list of invalid files :param invalid_files: List of invalid files, each element is a tuple of (file_path, error_message) """ if not invalid_files: print("All config files meet the specifications") return print("\nFound the following invalid config files:") print("=" * 80) for idx, (file_path, error_msg) in enumerate(invalid_files, 1): print(f"{idx}. File path: {file_path}") print(f" Error message: {error_msg}") print("-" * 80) if __name__ == "__main__": # Replace with the directory path you want to scan target_directory = input("Please enter the directory path to scan: ").strip() if os.path.isdir(target_directory): print(f"Start scanning directory: {target_directory}") invalid_files = scan_config_files(target_directory) print_invalid_files(invalid_files) print("Scan completed") else: print(f"Error: {target_directory} is not a valid directory") ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
