159 lines
5.4 KiB
Python
159 lines
5.4 KiB
Python
|
|
"""飞书多维表格「待落盘」→ analyze → promote(Phase B,FEISHU_BITABLE_AUTO_INGEST)。"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Any
|
|||
|
|
|
|||
|
|
from as_platform.config import FEISHU_BITABLE_FIELDS
|
|||
|
|
from as_platform.data.lake import (
|
|||
|
|
analyze_directory_candidate,
|
|||
|
|
create_feishu_directory_candidate,
|
|||
|
|
promote_candidate_to_inbox,
|
|||
|
|
)
|
|||
|
|
from as_platform.db.engine import session_scope
|
|||
|
|
from as_platform.db.models import DatasetCandidate, FeishuBitableLink
|
|||
|
|
from as_platform.integrations.feishu_bitable import is_bitable_configured, list_all_records, update_record
|
|||
|
|
from as_platform.integrations.feishu_bitable_sync import batch_key
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _validate_row(flat: dict[str, str]) -> str | None:
|
|||
|
|
project = flat.get("project") or ""
|
|||
|
|
task = flat.get("task") or ""
|
|||
|
|
batch_name = flat.get("batch_name") or ""
|
|||
|
|
mode = flat.get("mode") or ""
|
|||
|
|
if not project or not batch_name:
|
|||
|
|
return "缺少 项目 或 批次名"
|
|||
|
|
if batch_name == task:
|
|||
|
|
return "批次名不能与任务名相同"
|
|||
|
|
if task in ("dam", "forward") and not mode:
|
|||
|
|
return f"任务 {task} 须填写 子模式"
|
|||
|
|
data_path = (flat.get("data_path") or "").strip()
|
|||
|
|
if not data_path:
|
|||
|
|
return "待落盘须填写 数据路径(内网 NAS)"
|
|||
|
|
if not Path(data_path).exists():
|
|||
|
|
return f"数据路径不存在: {data_path}"
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _already_ingested(flat: dict[str, str], delivery_id: str) -> bool:
|
|||
|
|
cid = (flat.get("candidate_id") or "").strip()
|
|||
|
|
if cid:
|
|||
|
|
with session_scope() as db:
|
|||
|
|
rec = db.get(DatasetCandidate, cid)
|
|||
|
|
if rec and rec.status in ("promoted", "analyzed") and rec.inbox_path:
|
|||
|
|
return True
|
|||
|
|
if delivery_id:
|
|||
|
|
with session_scope() as db:
|
|||
|
|
link = db.query(FeishuBitableLink).filter_by(delivery_id=delivery_id).first()
|
|||
|
|
if link and link.inbox_path:
|
|||
|
|
return True
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def process_pending_ingest() -> dict[str, Any]:
|
|||
|
|
if not is_bitable_configured():
|
|||
|
|
return {"ok": False, "message": "未配置多维表格", "processed": 0}
|
|||
|
|
|
|||
|
|
records = list_all_records()
|
|||
|
|
processed = 0
|
|||
|
|
skipped = 0
|
|||
|
|
errors: list[str] = []
|
|||
|
|
|
|||
|
|
for rec in records:
|
|||
|
|
record_id = rec.get("record_id")
|
|||
|
|
flat = rec.get("flat") or {}
|
|||
|
|
if not record_id:
|
|||
|
|
continue
|
|||
|
|
if flat.get("status") != "待落盘":
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
delivery_id = flat.get("delivery_id") or ""
|
|||
|
|
if _already_ingested(flat, delivery_id):
|
|||
|
|
skipped += 1
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
err = _validate_row(flat)
|
|||
|
|
if err:
|
|||
|
|
try:
|
|||
|
|
update_record(record_id, {"status": "落盘失败", "error_message": err})
|
|||
|
|
except Exception as e:
|
|||
|
|
errors.append(f"{record_id}: {e}")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
update_record(record_id, {"status": "分析中", "error_message": ""})
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
project = flat.get("project") or "dms"
|
|||
|
|
task = flat.get("task") or None
|
|||
|
|
mode = flat.get("mode") or None
|
|||
|
|
batch_name = flat.get("batch_name") or ""
|
|||
|
|
src = Path((flat.get("data_path") or "").strip())
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
cand = create_feishu_directory_candidate(
|
|||
|
|
project=project,
|
|||
|
|
task=task if project == "dms" else None,
|
|||
|
|
mode=mode or None,
|
|||
|
|
source_dir=src,
|
|||
|
|
external_id=delivery_id or None,
|
|||
|
|
feishu_record_id=record_id,
|
|||
|
|
)
|
|||
|
|
cid = cand["id"]
|
|||
|
|
analyze_directory_candidate(cid, src)
|
|||
|
|
promo = promote_candidate_to_inbox(cid, batch=batch_name, mode=mode or None)
|
|||
|
|
inbox_path = promo.get("inbox_path") or ""
|
|||
|
|
|
|||
|
|
update_record(
|
|||
|
|
record_id,
|
|||
|
|
{
|
|||
|
|
"status": "待送标",
|
|||
|
|
"candidate_id": cid,
|
|||
|
|
"inbox_path": inbox_path,
|
|||
|
|
"error_message": "",
|
|||
|
|
"record_id": record_id,
|
|||
|
|
},
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
key = batch_key(
|
|||
|
|
project,
|
|||
|
|
task if project == "dms" else None,
|
|||
|
|
mode,
|
|||
|
|
promo.get("batch") or batch_name,
|
|||
|
|
)
|
|||
|
|
with session_scope() as db:
|
|||
|
|
link = db.query(FeishuBitableLink).filter_by(batch_key=key).first()
|
|||
|
|
if not link:
|
|||
|
|
link = FeishuBitableLink(
|
|||
|
|
batch_key=key,
|
|||
|
|
record_id=record_id,
|
|||
|
|
delivery_id=delivery_id or None,
|
|||
|
|
project=project,
|
|||
|
|
task=task,
|
|||
|
|
mode=mode,
|
|||
|
|
batch=promo.get("batch") or batch_name,
|
|||
|
|
)
|
|||
|
|
db.add(link)
|
|||
|
|
link.record_id = record_id
|
|||
|
|
link.delivery_id = delivery_id or link.delivery_id
|
|||
|
|
link.inbox_path = inbox_path
|
|||
|
|
db.flush()
|
|||
|
|
|
|||
|
|
processed += 1
|
|||
|
|
except Exception as e:
|
|||
|
|
msg = str(e)
|
|||
|
|
errors.append(f"{record_id}: {msg}")
|
|||
|
|
try:
|
|||
|
|
update_record(record_id, {"status": "落盘失败", "error_message": msg[:500]})
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"ok": True,
|
|||
|
|
"processed": processed,
|
|||
|
|
"skipped": skipped,
|
|||
|
|
"errors": errors[:20],
|
|||
|
|
"status_field": FEISHU_BITABLE_FIELDS.get("status"),
|
|||
|
|
}
|