Files
HSAP/platform/as_platform/integrations/feishu_bitable_ingest.py

159 lines
5.4 KiB
Python
Raw Normal View History

"""飞书多维表格「待落盘」→ analyze → promotePhase BFEISHU_BITABLE_AUTO_INGEST"""
from __future__ import annotations
from pathlib import Path
from typing import Any
from as_platform.config import FEISHU_BITABLE_FIELDS
from as_platform.data.lake import (
analyze_directory_candidate,
create_feishu_directory_candidate,
promote_candidate_to_inbox,
)
from as_platform.db.engine import session_scope
from as_platform.db.models import DatasetCandidate, FeishuBitableLink
from as_platform.integrations.feishu_bitable import is_bitable_configured, list_all_records, update_record
from as_platform.integrations.feishu_bitable_sync import batch_key
def _validate_row(flat: dict[str, str]) -> str | None:
project = flat.get("project") or ""
task = flat.get("task") or ""
batch_name = flat.get("batch_name") or ""
mode = flat.get("mode") or ""
if not project or not batch_name:
return "缺少 项目 或 批次名"
if batch_name == task:
return "批次名不能与任务名相同"
if task in ("dam", "forward") and not mode:
return f"任务 {task} 须填写 子模式"
data_path = (flat.get("data_path") or "").strip()
if not data_path:
return "待落盘须填写 数据路径(内网 NAS"
if not Path(data_path).exists():
return f"数据路径不存在: {data_path}"
return None
def _already_ingested(flat: dict[str, str], delivery_id: str) -> bool:
cid = (flat.get("candidate_id") or "").strip()
if cid:
with session_scope() as db:
rec = db.get(DatasetCandidate, cid)
if rec and rec.status in ("promoted", "analyzed") and rec.inbox_path:
return True
if delivery_id:
with session_scope() as db:
link = db.query(FeishuBitableLink).filter_by(delivery_id=delivery_id).first()
if link and link.inbox_path:
return True
return False
def process_pending_ingest() -> dict[str, Any]:
if not is_bitable_configured():
return {"ok": False, "message": "未配置多维表格", "processed": 0}
records = list_all_records()
processed = 0
skipped = 0
errors: list[str] = []
for rec in records:
record_id = rec.get("record_id")
flat = rec.get("flat") or {}
if not record_id:
continue
if flat.get("status") != "待落盘":
continue
delivery_id = flat.get("delivery_id") or ""
if _already_ingested(flat, delivery_id):
skipped += 1
continue
err = _validate_row(flat)
if err:
try:
update_record(record_id, {"status": "落盘失败", "error_message": err})
except Exception as e:
errors.append(f"{record_id}: {e}")
continue
try:
update_record(record_id, {"status": "分析中", "error_message": ""})
except Exception:
pass
project = flat.get("project") or "dms"
task = flat.get("task") or None
mode = flat.get("mode") or None
batch_name = flat.get("batch_name") or ""
src = Path((flat.get("data_path") or "").strip())
try:
cand = create_feishu_directory_candidate(
project=project,
task=task if project == "dms" else None,
mode=mode or None,
source_dir=src,
external_id=delivery_id or None,
feishu_record_id=record_id,
)
cid = cand["id"]
analyze_directory_candidate(cid, src)
promo = promote_candidate_to_inbox(cid, batch=batch_name, mode=mode or None)
inbox_path = promo.get("inbox_path") or ""
update_record(
record_id,
{
"status": "待送标",
"candidate_id": cid,
"inbox_path": inbox_path,
"error_message": "",
"record_id": record_id,
},
)
key = batch_key(
project,
task if project == "dms" else None,
mode,
promo.get("batch") or batch_name,
)
with session_scope() as db:
link = db.query(FeishuBitableLink).filter_by(batch_key=key).first()
if not link:
link = FeishuBitableLink(
batch_key=key,
record_id=record_id,
delivery_id=delivery_id or None,
project=project,
task=task,
mode=mode,
batch=promo.get("batch") or batch_name,
)
db.add(link)
link.record_id = record_id
link.delivery_id = delivery_id or link.delivery_id
link.inbox_path = inbox_path
db.flush()
processed += 1
except Exception as e:
msg = str(e)
errors.append(f"{record_id}: {msg}")
try:
update_record(record_id, {"status": "落盘失败", "error_message": msg[:500]})
except Exception:
pass
return {
"ok": True,
"processed": processed,
"skipped": skipped,
"errors": errors[:20],
"status_field": FEISHU_BITABLE_FIELDS.get("status"),
}