Files
yolov26_3d/tools/data_mining/batch_convert_txt_to_json.py
2026-06-24 09:35:46 +08:00

164 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Batch convert ground truth txt label files to JSON format.
Usage:
python test_scripts/batch_convert_txt_to_json.py <input_dir> <output_dir> [options]
Examples:
# Convert all txt files in a directory
python test_scripts/batch_convert_txt_to_json.py /path/to/labels/ /path/to/labels_json/
# With custom image dimensions
python test_scripts/batch_convert_txt_to_json.py /path/to/labels/ /path/to/labels_json/ \\
--image-width 1920 --image-height 1080
# Recursive conversion (preserving subdirectory structure)
python test_scripts/batch_convert_txt_to_json.py /path/to/labels/ /path/to/labels_json/ --recursive
"""
import sys
import argparse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
# Allow importing from the same test_scripts directory
sys.path.insert(0, str(Path(__file__).parent))
from convert_txt_to_json import convert_txt_to_json
def batch_convert(input_dir, output_dir, img_width=1920, img_height=1080,
recursive=False, workers=4, overwrite=False):
"""
Batch convert all txt label files in input_dir to JSON format.
Args:
input_dir: Path to directory containing .txt label files
output_dir: Path to directory where .json files will be saved
img_width: Image width for coordinate denormalization
img_height: Image height for coordinate denormalization
recursive: If True, search subdirectories recursively
workers: Number of parallel worker threads
overwrite: If True, overwrite existing JSON files
"""
input_path = Path(input_dir)
output_path = Path(output_dir)
if not input_path.exists():
raise FileNotFoundError(f"Input directory not found: {input_dir}")
# Collect all .txt files
pattern = "**/*.txt" if recursive else "*.txt"
txt_files = sorted(input_path.glob(pattern))
if not txt_files:
print(f"No .txt files found in: {input_dir}")
return
print(f"Found {len(txt_files)} txt file(s) in: {input_dir}")
print(f"Output directory: {output_dir}")
print(f"Image dimensions: {img_width}x{img_height}")
print(f"Workers: {workers}, Recursive: {recursive}, Overwrite: {overwrite}")
print("-" * 60)
# Build list of (src, dst) pairs
tasks = []
skipped = 0
for txt_file in txt_files:
# Preserve relative subdirectory structure when recursive
rel = txt_file.relative_to(input_path)
json_file = output_path / rel.with_suffix(".json")
if json_file.exists() and not overwrite:
skipped += 1
continue
json_file.parent.mkdir(parents=True, exist_ok=True)
tasks.append((txt_file, json_file))
if skipped:
print(f"Skipping {skipped} already-converted file(s) (use --overwrite to force).")
if not tasks:
print("Nothing to do.")
return
# Process files (parallel when workers > 1)
success = 0
failed = 0
errors = []
def _convert(args):
src, dst = args
convert_txt_to_json(str(src), str(dst), img_width, img_height)
return str(src)
if workers > 1:
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = {executor.submit(_convert, t): t for t in tasks}
for i, future in enumerate(as_completed(futures), 1):
src, _ = futures[future]
try:
future.result()
success += 1
if i % 100 == 0 or i == len(tasks):
print(f" [{i}/{len(tasks)}] done")
except Exception as e:
failed += 1
errors.append((str(src), str(e)))
print(f" ERROR: {src} -> {e}")
else:
for i, (src, dst) in enumerate(tasks, 1):
try:
convert_txt_to_json(str(src), str(dst), img_width, img_height)
success += 1
if i % 100 == 0 or i == len(tasks):
print(f" [{i}/{len(tasks)}] {src.name}")
except Exception as e:
failed += 1
errors.append((str(src), str(e)))
print(f" ERROR: {src} -> {e}")
print("-" * 60)
print(f"Done. Success: {success}, Failed: {failed}, Skipped: {skipped}")
if errors:
print("\nFailed files:")
for path, msg in errors:
print(f" {path}: {msg}")
def main():
parser = argparse.ArgumentParser(
description="Batch convert ground truth txt label files to JSON format",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument("input_dir", help="Input directory containing .txt label files")
parser.add_argument("output_dir", help="Output directory for .json files")
parser.add_argument("--image-width", type=int, default=1920,
help="Image width for coordinate denormalization (default: 1920)")
parser.add_argument("--image-height", type=int, default=1080,
help="Image height for coordinate denormalization (default: 1080)")
parser.add_argument("--recursive", action="store_true",
help="Recursively search subdirectories for txt files")
parser.add_argument("--workers", type=int, default=4,
help="Number of parallel worker threads (default: 4)")
parser.add_argument("--overwrite", action="store_true",
help="Overwrite existing JSON files (default: skip)")
args = parser.parse_args()
batch_convert(
input_dir=args.input_dir,
output_dir=args.output_dir,
img_width=args.image_width,
img_height=args.image_height,
recursive=args.recursive,
workers=args.workers,
overwrite=args.overwrite,
)
if __name__ == "__main__":
main()