diff --git a/README.md b/README.md index 32ce100..bce6813 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ 从 `vscode-edi-support` 中提取并用 Python 重写的 EDIFACT 解析工具集。 -## 脚本 +## 脚本说明 - `edifact_to_yaml.py`: 解析 EDIFACT,默认输出 YAML(可选 JSON) -- `edifact_summary.py`: 输出业务摘要(默认 YAML,可选 JSON) +- `edifact_summary.py`: 输出业务摘要(默认文本表格,可选 JSON) - `edifact_parser.py`: 底层解析与语义补全逻辑 -## 用法 +## 快速开始 解析为 YAML: @@ -22,10 +22,10 @@ python3 edifact_to_yaml.py sample-orders.edifact --json -o sample-orders.full.json ``` -输出摘要(YAML): +输出摘要(文本表格): ```bash -python3 edifact_summary.py sample-orders.edifact -o sample-orders.summary.yaml +python3 edifact_summary.py sample-orders.edifact -o sample-orders.summary.txt ``` 输出摘要(JSON): @@ -33,3 +33,7 @@ ```bash python3 edifact_summary.py sample-orders.edifact --json -o sample-orders.summary.json ``` + +## `edifact_summary` 工作流程文档 + +详细流程见:[docs/edifact_summary_workflow.md](/Users/smdhz/Workspace/edi/docs/edifact_summary_workflow.md) diff --git a/docs/edifact_summary_workflow.md b/docs/edifact_summary_workflow.md new file mode 100644 index 0000000..cfdc934 --- /dev/null +++ b/docs/edifact_summary_workflow.md @@ -0,0 +1,77 @@ +# edifact_summary 工作流程说明 + +本文说明 `edifact_summary.py` 从 EDIFACT 文本到摘要输出的完整处理路径。 + +## 1. 输入与解析 + +1. CLI 读取输入文件内容(UTF-8)。 +2. 调用 `parse_edifact(document)` 把原始报文解析为结构化对象(interchange/group/transaction/segment)。 +3. `build_summary(parsed)` 对解析结果进行业务摘要提取。 + +## 2. 摘要构建主流程 + +`build_summary` 的遍历层级: + +1. `interchanges` +2. `functional_groups` +3. `transaction_sets` + +对每个 transaction: + +1. 调用 `summarize_transaction(transaction)` 生成单消息摘要。 +2. 附加 interchange 元信息(sender/receiver/control_ref/date/time)。 +3. 追加到 `messages` 列表。 + +最终输出结构: + +- `interchange_count`: 互换组数量 +- `messages`: 每条业务消息的摘要 + +## 3. 单消息摘要提取(summarize_transaction) + +单消息提取按以下顺序进行: + +1. 组装 transaction 全量 segments(start + body + end)。 +2. 提取文档头 `BGM`: + - `document.code` + - `document.number` + - `document.function_code` +3. 提取公共业务信息: + - `extract_dates`:读取 `DTM` + - `extract_references`:读取 `RFF` + - `extract_parties`:读取 `NAD` + - `extract_currency`:读取 `CUX` +4. 提取行项目: + - `extract_line_items` 以 `LIN` 为锚点聚合关联的 `PIA/QTY/PRI` + - 产出 `line_items` 与 `line_item_count` +5. 估算总金额 `estimated_total`: + - 每行选取“优先数量限定符 + 优先价格限定符” + - 执行 `qty * price` 后累加 + - 四舍五入到 2 位小数 + +## 4. 关键提取函数说明 + +- `safe_value(segment, element_index)`:按元素序号安全读取标量值。 +- `safe_components(segment, element_index)`:读取复合元素组件值数组。 +- `qualifier_name(...)`:限定符名称优先使用内置映射,缺失时回退到 parser 语义元数据。 +- `pick_measure(entries, preferred_qualifiers)`:从候选列表中按优先限定符选择最合适值。 + +## 5. 输出模式 + +`edifact_summary.py` 支持两种输出: + +1. `--json`:输出机器可读 JSON。 +2. 默认文本模式:`render_summary_table` 生成 ASCII 表格,按消息分块展示。 + +## 6. 运行示例 + +```bash +python3 edifact_summary.py sample-orders.edifact -o sample-orders.summary.txt +python3 edifact_summary.py sample-orders.edifact --json -o sample-orders.summary.json +``` + +## 7. 设计取舍 + +- 优先保留“可读摘要”而非完整报文细节。 +- 金额为估算值,依赖报文中可解析的数量与价格。 +- 名称映射兼容两种来源:静态映射 + 语义元数据回退。 diff --git a/edifact_summary.py b/edifact_summary.py index 4b2fbfb..2460818 100644 --- a/edifact_summary.py +++ b/edifact_summary.py @@ -36,6 +36,7 @@ def safe_value(segment: dict[str, Any], element_index: int) -> str | None: + """Return a scalar element value by 1-based EDIFACT element index.""" elements = segment.get("elements", []) idx = element_index - 1 if idx < 0 or idx >= len(elements): @@ -45,6 +46,7 @@ def safe_components(segment: dict[str, Any], element_index: int) -> list[str]: + """Return all string components of a composite element.""" elements = segment.get("elements", []) idx = element_index - 1 if idx < 0 or idx >= len(elements): @@ -61,6 +63,7 @@ def component_metadata(segment: dict[str, Any], element_index: int, component_index: int) -> dict[str, Any]: + """Read raw component metadata (e.g., semantic info) with safe bounds checks.""" elements = segment.get("elements", []) element_idx = element_index - 1 component_idx = component_index - 1 @@ -81,6 +84,7 @@ code: str, fallback_names: dict[str, str], ) -> str: + """Resolve qualifier description from static fallback, then parser semantic metadata.""" name = fallback_names.get(code, "") if name: return name @@ -93,6 +97,7 @@ def parse_decimal(value: str | None) -> float | None: + """Best-effort numeric parse used by amount estimation.""" if value is None or value == "": return None try: @@ -102,6 +107,7 @@ def extract_dates(segments: list[dict[str, Any]]) -> list[dict[str, str]]: + """Collect DTM date entries as qualifier/value pairs.""" dates: list[dict[str, str]] = [] for segment in segments: if segment.get("id") != "DTM": @@ -122,6 +128,7 @@ def extract_references(segments: list[dict[str, Any]]) -> list[dict[str, str]]: + """Collect RFF references used as business keys.""" refs: list[dict[str, str]] = [] for segment in segments: if segment.get("id") != "RFF": @@ -140,6 +147,7 @@ def extract_parties(segments: list[dict[str, Any]]) -> dict[str, dict[str, str]]: + """Collect NAD party master data keyed by party qualifier (BY/SU/DP...).""" parties: dict[str, dict[str, str]] = {} for segment in segments: if segment.get("id") != "NAD": @@ -160,6 +168,7 @@ def extract_currency(segments: list[dict[str, Any]]) -> str | None: + """Pick the first document currency from CUX.""" for segment in segments: if segment.get("id") != "CUX": continue @@ -170,6 +179,7 @@ def extract_line_items(segments: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Build line-level facts by grouping segments under each LIN anchor.""" line_items: list[dict[str, Any]] = [] current: dict[str, Any] | None = None @@ -235,6 +245,7 @@ def transaction_segments(transaction: dict[str, Any]) -> list[dict[str, Any]]: + """Flatten one transaction into start + body + end segment sequence.""" segments: list[dict[str, Any]] = [] start_segment = transaction.get("start_segment") if isinstance(start_segment, dict): @@ -247,6 +258,7 @@ def pick_measure(entries: list[dict[str, Any]], preferred_qualifiers: tuple[str, ...]) -> dict[str, Any]: + """Pick the first preferred qualifier; fallback to first available entry.""" for qualifier in preferred_qualifiers: found = next((entry for entry in entries if entry.get("qualifier") == qualifier), None) if found: @@ -255,6 +267,7 @@ def summarize_transaction(transaction: dict[str, Any]) -> dict[str, Any]: + """Build one concise business summary from a parsed transaction set.""" summary: dict[str, Any] = { "message_ref": transaction.get("id"), "message_type": transaction.get("meta", {}).get("version"), @@ -268,6 +281,7 @@ currency = extract_currency(segments) line_items = extract_line_items(segments) + # BGM carries the document identity (business doc code + number). bgm = next((s for s in segments if s.get("id") == "BGM"), None) if bgm: summary["document"] = { @@ -288,6 +302,7 @@ summary["line_items"] = line_items summary["line_item_count"] = len(line_items) + # Lightweight estimate: preferred quantity * preferred price per line. amount_sum = 0.0 has_amount = False for item in line_items: @@ -307,6 +322,7 @@ def build_summary(parsed: dict[str, Any]) -> dict[str, Any]: + """Aggregate all interchanges/groups/transactions into a summary payload.""" interchanges = parsed.get("interchanges", []) result: dict[str, Any] = {"interchange_count": len(interchanges), "messages": []} @@ -316,6 +332,7 @@ for transaction in group.get("transaction_sets", []): tx_summary = summarize_transaction(transaction) tx_summary["interchange"] = { + # Interchange metadata from UNB header for traceability. "control_ref": interchange_meta.get("id"), "sender_id": interchange_meta.get("senderID"), "receiver_id": interchange_meta.get("receiverID"), @@ -328,6 +345,7 @@ def as_text(value: Any) -> str: + """Normalize values before table rendering.""" if value is None: return "" if isinstance(value, float): @@ -336,6 +354,7 @@ def render_table(headers: list[str], rows: list[list[Any]]) -> str: + """Render a simple ASCII table with dynamic column widths.""" text_rows = [[as_text(cell) for cell in row] for row in rows] widths = [len(h) for h in headers] for row in text_rows: @@ -352,6 +371,7 @@ def render_message_tables(message: dict[str, Any], index: int) -> str: + """Render one message summary as multiple readable table blocks.""" blocks: list[str] = [f"Message {index}"] header_rows = [ @@ -428,6 +448,7 @@ def render_summary_table(summary: dict[str, Any]) -> str: + """Render all message summaries into one plain-text report.""" messages = summary.get("messages", []) if not messages: return "No messages found." @@ -444,6 +465,7 @@ def main() -> int: + """CLI entry point: parse EDIFACT, build summary, write JSON or table text.""" args = build_parser().parse_args() document = Path(args.input).read_text(encoding="utf-8")