From 36116b4dc582262409547431fdca0cf661190b6b Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 9 Jul 2024 10:40:33 +0800
Subject: [PATCH 01/34] feat: `flamegraph` wrapper

---
 tools/perfrec-python/perf.py | 56 ++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 tools/perfrec-python/perf.py

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
new file mode 100644
index 00000000..67543682
--- /dev/null
+++ b/tools/perfrec-python/perf.py
@@ -0,0 +1,56 @@
+import subprocess
+import argparse
+import os
+
+
+def generate_flamegraph(perf_data, output_svg, flamegraph_path):
+    # Ensure perf script is available
+    try:
+        subprocess.run(["perf", "--version"], check=True)
+    except subprocess.CalledProcessError:
+        print("Error: perf is not installed or not in PATH.")
+        return
+
+    # Ensure Flamegraph scripts are available
+    stackcollapse_path = os.path.join(flamegraph_path, "stackcollapse-perf.pl")
+    flamegraph_script_path = os.path.join(flamegraph_path, "flamegraph.pl")
+
+    if not os.path.isfile(stackcollapse_path) or not os.path.isfile(
+        flamegraph_script_path
+    ):
+        print(
+            f"Error: Flamegraph scripts not found in the provided directory {flamegraph_path}."
+        )
+        return
+
+    # Generate the folded stack output
+    folded_output = perf_data + ".folded"
+    with open(folded_output, "w") as f:
+        script_output = subprocess.run(
+            ["perf", "script", "-i", perf_data], check=True, stdout=subprocess.PIPE
+        )
+        subprocess.run(
+            [stackcollapse_path], check=True, input=script_output.stdout, stdout=f
+        )
+
+    # Generate the flamegraph
+    with open(output_svg, "w") as f:
+        subprocess.run([flamegraph_script_path, folded_output], check=True, stdout=f)
+
+    print(f"Flamegraph generated at {output_svg}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate a Flamegraph from perf.data")
+    parser.add_argument("perf_data", help="Path to the perf.data file")
+    parser.add_argument("output_svg", help="Path to the output SVG file")
+    parser.add_argument(
+        "flamegraph_path", help="Path to the Flamegraph scripts directory"
+    )
+    args = parser.parse_args()
+
+    generate_flamegraph(args.perf_data, args.output_svg, args.flamegraph_path)
+
+
+if __name__ == "__main__":
+    main()
-- 
Gitee


From 827a3ae0976654b41944e7e2e9cfb34a9c24d3af Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 9 Jul 2024 10:58:48 +0800
Subject: [PATCH 02/34] feat: `flamegraph` wrapper

---
 tools/perfrec-python/perf.py | 40 +++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 67543682..4deb5671 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -1,6 +1,7 @@
-import subprocess
 import argparse
 import os
+import subprocess
+from collections import defaultdict
 
 
 def generate_flamegraph(perf_data, output_svg, flamegraph_path):
@@ -39,6 +40,43 @@ def generate_flamegraph(perf_data, output_svg, flamegraph_path):
 
     print(f"Flamegraph generated at {output_svg}")
 
+    # Analyze the folded stack output
+    analyze_folded_stack(folded_output)
+
+
+def analyze_folded_stack(folded_output):
+    function_counts = defaultdict(int)
+    total_count = 0
+
+    # Read the folded stack output
+    with open(folded_output, "r") as f:
+        for line in f:
+            parts = line.strip().rsplit(
+                " ", 1
+            )  # Use rsplit to handle function names with spaces
+            count = int(parts[-1])
+            stack = parts[0].split(";")
+            for function in stack:
+                function_counts[function] += count
+            total_count += count
+
+    # Filter and display functions with more than 5% total count
+    threshold = total_count * 0.05
+    results = [
+        (func, count) for func, count in function_counts.items() if count >= threshold
+    ]
+
+    # Sort results by count in descending order
+    results.sort(key=lambda x: x[1], reverse=True)
+
+    # Print the results in an ASCII table format
+    print("\nFunctions with more than 5% of total samples:\n")
+    print(f"{'Function':<40} {'Count':<10} {'Percentage':<10}")
+    print("=" * 60)
+    for func, count in results:
+        percentage = (count / total_count) * 100
+        print(f"{func:<40} {count:<10} {percentage:<10.2f}")
+
 
 def main():
     parser = argparse.ArgumentParser(description="Generate a Flamegraph from perf.data")
-- 
Gitee


From a2c78fe6d86d49618a0b548982cb2067fd7414c0 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 9 Jul 2024 11:10:25 +0800
Subject: [PATCH 03/34] feat: `flamegraph` wrapper

---
 tools/perfrec-python/perf.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 4deb5671..abdcfcfd 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -3,6 +3,8 @@ import os
 import subprocess
 from collections import defaultdict
 
+from tabulate import tabulate
+
 
 def generate_flamegraph(perf_data, output_svg, flamegraph_path):
     # Ensure perf script is available
@@ -56,8 +58,7 @@ def analyze_folded_stack(folded_output):
             )  # Use rsplit to handle function names with spaces
             count = int(parts[-1])
             stack = parts[0].split(";")
-            for function in stack:
-                function_counts[function] += count
+            function_counts[stack[-1]] += count
             total_count += count
 
     # Filter and display functions with more than 5% total count
@@ -69,13 +70,16 @@ def analyze_folded_stack(folded_output):
     # Sort results by count in descending order
     results.sort(key=lambda x: x[1], reverse=True)
 
-    # Print the results in an ASCII table format
-    print("\nFunctions with more than 5% of total samples:\n")
-    print(f"{'Function':<40} {'Count':<10} {'Percentage':<10}")
-    print("=" * 60)
+    # Prepare data for tabulate
+    table_data = []
     for func, count in results:
         percentage = (count / total_count) * 100
-        print(f"{func:<40} {count:<10} {percentage:<10.2f}")
+        table_data.append([func, count, f"{percentage:.2f}%"])
+
+    # Print the results using tabulate
+    print("\nFunctions with more than 5% of total samples:\n")
+    headers = ["Function", "Count", "Percentage"]
+    print(tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
 def main():
-- 
Gitee


From 87e7ca66ef92b7c25534fd74047264fc15e0c5eb Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 9 Jul 2024 11:19:34 +0800
Subject: [PATCH 04/34] feat: `flamegraph` wrapper

---
 tools/perfrec-python/perf.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index abdcfcfd..4a7ddd52 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -74,7 +74,7 @@ def analyze_folded_stack(folded_output):
     table_data = []
     for func, count in results:
         percentage = (count / total_count) * 100
-        table_data.append([func, count, f"{percentage:.2f}%"])
+        table_data.append([limit_line(func, 50), count, f"{percentage:.2f}%"])
 
     # Print the results using tabulate
     print("\nFunctions with more than 5% of total samples:\n")
@@ -82,6 +82,21 @@ def analyze_folded_stack(folded_output):
     print(tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
+def limit_line(input: str, line_length: int) -> str:
+    if line_length >= len(input):
+        return input
+    limited_str = ""
+    if line_length > 0:
+        count = 0
+        for c in input:
+            if count >= line_length:
+                limited_str += "\n"
+                count = 0
+            limited_str += c
+            count += 1
+    return limited_str
+
+
 def main():
     parser = argparse.ArgumentParser(description="Generate a Flamegraph from perf.data")
     parser.add_argument("perf_data", help="Path to the perf.data file")
-- 
Gitee


From 5ea7b9af8995efeeae450dcc9085b8d7f70b3494 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 16 Jul 2024 14:33:27 +0800
Subject: [PATCH 05/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 77 ++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tools/perfrec-python/fusion_tracing.py

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
new file mode 100644
index 00000000..4446cce6
--- /dev/null
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -0,0 +1,77 @@
+import argparse
+import re
+from datetime import datetime
+
+
+class MxRecEvent:
+    def __init__(self, log_line: str, event_name: str):
+        timestamp_s = get_timestamp(log_line)
+        duration_ms = get_duration(log_line, event_name)
+        self.timestamp_start_us = timestamp_s * 1e6 - float(duration_ms) * 1e3
+        self.duration_us = float(duration_ms) * 1e3
+
+
+def get_timestamp(log_line: str) -> float:
+    pattern = r"\[(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\.\d+)\]"
+    match = re.search(pattern, log_line)
+    if match:
+        date_time_str = match.group(1)
+        date_time_format = "%Y/%m/%d %H:%M:%S.%f"
+        # Parse the date-time string into a datetime object
+        date_time_obj = datetime.strptime(date_time_str, date_time_format)
+        # Convert the datetime object to a timestamp
+        return date_time_obj.timestamp()
+    else:
+        raise RuntimeError(f"There is no time in log: {log_line}")
+
+
+def get_duration(log_line: str, event_name: str) -> int:
+    pattern = event_name + r".*:\s*(\d+)"
+    match = re.search(pattern, log_line)
+    if match:
+        duration_ms = match.group(1)
+        return int(duration_ms)
+    else:
+        raise RuntimeError(f"There is no event: {event_name}, log: {log_line}")
+
+
+def get_process_id(log_line: str) -> int:
+    pattern = r"process_id:\s*(\d+)"
+    match = re.search(pattern, log_line)
+    if match:
+        process_id = match.group(1)
+        return int(process_id)
+    else:
+        raise RuntimeError(f"There is no process_id in log: {log_line}")
+
+
+def get_thread_id(log_line: str) -> int:
+    pattern = r"thread_id:\s*(\d+)"
+    match = re.search(pattern, log_line)
+    if match:
+        thread_id = match.group(1)
+        return int(thread_id)
+    else:
+        raise RuntimeError(f"There is no thread_id in log: {log_line}")
+
+
+def main():
+    log = "[1,0]<stdout>:[MxRec][2024/7/15 16:28:5.148569] some other contents getAndSendTensorsTC(ms): 166."
+    print(get_timestamp(log))
+    print(get_duration(log, "getAndSendTensor"))
+    event = MxRecEvent(log, "getAndSendTensors")
+    print(event.timestamp_start_us, event.duration_us)
+
+    parser = argparse.ArgumentParser(description="Generate CPU/NPU fusion tracing json.")
+    parser.add_argument("debug_log", help="MxRec DEBUG level log.")
+    args = parser.parse_args()
+
+    thread_ids = set()
+    with open(args.debug_log) as log:
+        for line in log:
+            if "getAndSendTensorsTC" in line:
+                thread_ids.add(get_thread_id(line))
+    print(thread_ids)
+
+if __name__ == "__main__":
+    main()
-- 
Gitee


From da38c520580ec650e1c6ca6d579642a9dd3bd376 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 16 Jul 2024 17:18:42 +0800
Subject: [PATCH 06/34] add fusion tracing

---
 tools/perfrec-python/config.toml       |  5 +++
 tools/perfrec-python/fusion_tracing.py | 54 ++++++++++++++++----------
 2 files changed, 38 insertions(+), 21 deletions(-)
 create mode 100644 tools/perfrec-python/config.toml

diff --git a/tools/perfrec-python/config.toml b/tools/perfrec-python/config.toml
new file mode 100644
index 00000000..1bbc6aa9
--- /dev/null
+++ b/tools/perfrec-python/config.toml
@@ -0,0 +1,5 @@
+[mxrec]
+key_process = ["getBatchData", "getAndProcess"]
+process_emb_info = ["getAndSendTensors"]
+lookup_swap_addr = ["lookupAddrs"]
+embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 4446cce6..4a27bb1f 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -1,14 +1,33 @@
 import argparse
 import re
+from collections import defaultdict
 from datetime import datetime
+from typing import Dict, List
+
+import toml
 
 
 class MxRecEvent:
     def __init__(self, log_line: str, event_name: str):
         timestamp_s = get_timestamp(log_line)
         duration_ms = get_duration(log_line, event_name)
+        process_id = get_process_id(log_line)
         self.timestamp_start_us = timestamp_s * 1e6 - float(duration_ms) * 1e3
         self.duration_us = float(duration_ms) * 1e3
+        self.process_id = process_id
+        self.name = event_name
+
+
+def extract_events(
+    log_path: str, event_names: Dict[str, str]
+) -> Dict[str, List[MxRecEvent]]:
+    events = defaultdict(list)
+    with open(log_path) as log:
+        for line in log:
+            for name in event_names.keys():
+                if name in line:
+                    events[event_names[name]].append(MxRecEvent(line, name))
+    return events
 
 
 def get_timestamp(log_line: str) -> float:
@@ -45,33 +64,26 @@ def get_process_id(log_line: str) -> int:
         raise RuntimeError(f"There is no process_id in log: {log_line}")
 
 
-def get_thread_id(log_line: str) -> int:
-    pattern = r"thread_id:\s*(\d+)"
-    match = re.search(pattern, log_line)
-    if match:
-        thread_id = match.group(1)
-        return int(thread_id)
-    else:
-        raise RuntimeError(f"There is no thread_id in log: {log_line}")
+def read_config() -> Dict[str, str]:
+    config = toml.load("config.toml")
+    mxrec_config = defaultdict(str)
+    for pipe, event_list in config["mxrec"].items():
+        for event in event_list:
+            mxrec_config[event] = pipe
+    return mxrec_config
 
 
 def main():
-    log = "[1,0]<stdout>:[MxRec][2024/7/15 16:28:5.148569] some other contents getAndSendTensorsTC(ms): 166."
-    print(get_timestamp(log))
-    print(get_duration(log, "getAndSendTensor"))
-    event = MxRecEvent(log, "getAndSendTensors")
-    print(event.timestamp_start_us, event.duration_us)
-
-    parser = argparse.ArgumentParser(description="Generate CPU/NPU fusion tracing json.")
+    parser = argparse.ArgumentParser(
+        description="Generate CPU/NPU fusion tracing json."
+    )
     parser.add_argument("debug_log", help="MxRec DEBUG level log.")
     args = parser.parse_args()
 
-    thread_ids = set()
-    with open(args.debug_log) as log:
-        for line in log:
-            if "getAndSendTensorsTC" in line:
-                thread_ids.add(get_thread_id(line))
-    print(thread_ids)
+    log_path = args.debug_log
+    config = read_config()
+    print(extract_events(log_path, config))
+
 
 if __name__ == "__main__":
     main()
-- 
Gitee


From 8a864be1f414a35a9ccb6a1c838dd48636ee6304 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Wed, 17 Jul 2024 16:09:59 +0800
Subject: [PATCH 07/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 137 +++++++++++++++++++++++--
 1 file changed, 129 insertions(+), 8 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 4a27bb1f..c4e16311 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -1,35 +1,82 @@
 import argparse
+import json
 import re
 from collections import defaultdict
 from datetime import datetime
-from typing import Dict, List
+from itertools import groupby
+from operator import attrgetter
+from typing import Any, Dict, List
 
 import toml
 
 
 class MxRecEvent:
-    def __init__(self, log_line: str, event_name: str):
+    def __init__(self, log_line: str, event_name: str, pipe_id: int):
         timestamp_s = get_timestamp(log_line)
         duration_ms = get_duration(log_line, event_name)
         process_id = get_process_id(log_line)
         self.timestamp_start_us = timestamp_s * 1e6 - float(duration_ms) * 1e3
         self.duration_us = float(duration_ms) * 1e3
+        self.timestamp_end_us = timestamp_s * 1e6
         self.process_id = process_id
         self.name = event_name
+        self.pipe_id = pipe_id
 
 
 def extract_events(
     log_path: str, event_names: Dict[str, str]
-) -> Dict[str, List[MxRecEvent]]:
-    events = defaultdict(list)
+) -> Dict[int, Dict[str, List[MxRecEvent]]]:
+    events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    pipe_names = get_pipes()
+    pipe_ids = defaultdict(int)
+    for i, pipe in enumerate(pipe_names):
+        pipe_ids[pipe] = i
     with open(log_path) as log:
         for line in log:
-            for name in event_names.keys():
+            for name, pipe in event_names.items():
                 if name in line:
-                    events[event_names[name]].append(MxRecEvent(line, name))
+                    event = MxRecEvent(line, name, pipe_ids[pipe])
+                    events[event.process_id][pipe].append(event)
     return events
 
 
+def merge_multithread_timeline(events: List[MxRecEvent]) -> List[MxRecEvent]:
+    group_by_name = {
+        name: sorted(list(group), key=attrgetter("timestamp_start_us"))
+        for name, group in groupby(events, key=attrgetter("name"))
+    }
+    merged = list()
+    for events in group_by_name.values():
+        if len(events) > 1:
+            tmp_merged = list()
+            while events:
+                event = events.pop(0)
+                if tmp_merged:
+                    last_event: MxRecEvent = tmp_merged.pop()
+                    if event.timestamp_start_us <= last_event.timestamp_end_us:
+                        event.timestamp_start_us = min(
+                            event.timestamp_start_us, last_event.timestamp_start_us
+                        )
+                        event.timestamp_end_us = max(
+                            event.timestamp_end_us, last_event.timestamp_end_us
+                        )
+                        event.duration_us = (
+                            event.timestamp_end_us - event.timestamp_start_us
+                        )
+                        tmp_merged.append(event)
+                    else:
+                        tmp_merged.append(last_event)
+                        tmp_merged.append(event)
+                else:
+                    tmp_merged.append(event)
+            merged.extend(tmp_merged)
+        else:
+            merged.extend(events)
+    return merged
+
+
 def get_timestamp(log_line: str) -> float:
     pattern = r"\[(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\.\d+)\]"
     match = re.search(pattern, log_line)
@@ -73,16 +120,90 @@ def read_config() -> Dict[str, str]:
     return mxrec_config
 
 
+def get_pipes() -> List[str]:
+    config = toml.load("config.toml")
+    pipes = list()
+    for pipe in config["mxrec"].keys():
+        pipes.append(pipe)
+    return pipes
+
+
+class TracingMetaData:
+    def __init__(self, name: str, pid: int, tid: int, ph: str, args: Dict[str, Any]):
+        self.name = name
+        self.pid = pid
+        self.tid = tid
+        self.ph = ph
+        self.args = args
+
+
+class TracingEvent:
+    def __init__(self, mxrec_event: MxRecEvent):
+        self.name = mxrec_event.name
+        self.pid = mxrec_event.process_id
+        self.tid = get_fake_tid(self.pid, mxrec_event.pipe_id)
+        self.ts = mxrec_event.timestamp_start_us
+        self.dur = mxrec_event.duration_us
+        self.ph = "X"
+        self.args = {}
+
+
+def get_metadata(processes: List[int]) -> List[TracingMetaData]:
+    res = list()
+    pipes = get_pipes()
+    for i, pid in enumerate(processes):
+        metadata1 = TracingMetaData(
+            "process_name", pid, 0, "M", {"name": f"MxRec process {i}"}
+        )
+        metadata2 = TracingMetaData(
+            "process_sort_index", pid, 0, "M", {"sort_index": i}
+        )
+        res.append(metadata1)
+        res.append(metadata2)
+        for pipe_i, pipe in enumerate(pipes):
+            pipe_metadata1 = TracingMetaData(
+                "thread_name",
+                pid,
+                get_fake_tid(pid, pipe_i),
+                "M",
+                {"name": f"{pipe} {pid}"},
+            )
+            pipe_metadata2 = TracingMetaData(
+                "thread_sort_index",
+                pid,
+                get_fake_tid(pid, pipe_i),
+                "M",
+                {"sort_index": pipe_i},
+            )
+            res.append(pipe_metadata1)
+            res.append(pipe_metadata2)
+    return res
+
+
+def get_fake_tid(pid: int, pipe_id: int) -> int:
+    return pid * 10 + pipe_id
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Generate CPU/NPU fusion tracing json."
     )
-    parser.add_argument("debug_log", help="MxRec DEBUG level log.")
+    parser.add_argument("debug_log", help="MxRec DEBUG level log flie path.")
     args = parser.parse_args()
 
     log_path = args.debug_log
     config = read_config()
-    print(extract_events(log_path, config))
+
+    mxrec_events = extract_events(log_path, config)
+    tracing = list()
+    tracing.extend(get_metadata(list(mxrec_events.keys())))
+    for process in mxrec_events.values():
+        for events in process.values():
+            merged = merge_multithread_timeline(events)
+            tracing.extend([TracingEvent(event) for event in merged])
+
+    with open("mxrec_tracing.json", "w") as file:
+        json.dump(tracing, file, indent=4)
 
 
 if __name__ == "__main__":
-- 
Gitee


From ebe752a100dad5dd7dd66209bf58524b5c7e313d Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Wed, 17 Jul 2024 16:14:31 +0800
Subject: [PATCH 08/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index c4e16311..527ae3f0 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -203,7 +203,7 @@ def main():
             tracing.extend([TracingEvent(event) for event in merged])
 
     with open("mxrec_tracing.json", "w") as file:
-        json.dump(tracing, file, indent=4)
+        json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
 
 
 if __name__ == "__main__":
-- 
Gitee


From 5ee0e9c34c967da671081354d3ce03eeeca491ae Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 18 Jul 2024 10:29:18 +0800
Subject: [PATCH 09/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 527ae3f0..278a8f3e 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -3,8 +3,6 @@ import json
 import re
 from collections import defaultdict
 from datetime import datetime
-from itertools import groupby
-from operator import attrgetter
 from typing import Any, Dict, List
 
 import toml
@@ -43,10 +41,11 @@ def extract_events(
 
 
 def merge_multithread_timeline(events: List[MxRecEvent]) -> List[MxRecEvent]:
-    group_by_name = {
-        name: sorted(list(group), key=attrgetter("timestamp_start_us"))
-        for name, group in groupby(events, key=attrgetter("name"))
-    }
+    group_by_name: Dict[str, List[MxRecEvent]] = defaultdict(list)
+    for event in events:
+        group_by_name[event.name].append(event)
+    for es in group_by_name.values():
+        es.sort(key=lambda x: x.timestamp_start_us)
     merged = list()
     for events in group_by_name.values():
         if len(events) > 1:
-- 
Gitee


From dc5d1297387c5b8ca555fd12ffaec47e5ed1beaf Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 18 Jul 2024 10:49:23 +0800
Subject: [PATCH 10/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 278a8f3e..a8fddfaf 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -55,19 +55,18 @@ def merge_multithread_timeline(events: List[MxRecEvent]) -> List[MxRecEvent]:
                 if tmp_merged:
                     last_event: MxRecEvent = tmp_merged.pop()
                     if event.timestamp_start_us <= last_event.timestamp_end_us:
-                        event.timestamp_start_us = min(
-                            event.timestamp_start_us, last_event.timestamp_start_us
-                        )
+                        last_event.timestamp_end_us = event.timestamp_start_us
                         event.timestamp_end_us = max(
                             event.timestamp_end_us, last_event.timestamp_end_us
                         )
+                        last_event.duration_us = (
+                            last_event.timestamp_end_us - last_event.timestamp_start_us
+                        )
                         event.duration_us = (
                             event.timestamp_end_us - event.timestamp_start_us
                         )
-                        tmp_merged.append(event)
-                    else:
-                        tmp_merged.append(last_event)
-                        tmp_merged.append(event)
+                    tmp_merged.append(last_event)
+                    tmp_merged.append(event)
                 else:
                     tmp_merged.append(event)
             merged.extend(tmp_merged)
-- 
Gitee


From 2e1fd8cd96b4208b476e7aa30a37a400f9a8c5eb Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 18 Jul 2024 14:22:21 +0800
Subject: [PATCH 11/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 72 +++++++++++++-------------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index a8fddfaf..b3c54e00 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -5,6 +5,7 @@ from collections import defaultdict
 from datetime import datetime
 from typing import Any, Dict, List
 
+import pandas as pd
 import toml
 
 
@@ -21,7 +22,25 @@ class MxRecEvent:
         self.pipe_id = pipe_id
 
 
-def extract_events(
+class OpEvent:
+    def __init__(
+        self,
+        device_id: int,
+        op_name: str,
+        op_type: str,
+        task_type: str,
+        start_timestamp: float,
+        duration: float,
+    ):
+        self.device_id = device_id
+        self.op_name = op_name
+        self.op_type = op_type
+        self.task_type = task_type
+        self.start_timestamp = start_timestamp
+        self.duration = duration
+
+
+def extract_mxrec_events(
     log_path: str, event_names: Dict[str, str]
 ) -> Dict[int, Dict[str, List[MxRecEvent]]]:
     events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
@@ -40,39 +59,19 @@ def extract_events(
     return events
 
 
-def merge_multithread_timeline(events: List[MxRecEvent]) -> List[MxRecEvent]:
-    group_by_name: Dict[str, List[MxRecEvent]] = defaultdict(list)
-    for event in events:
-        group_by_name[event.name].append(event)
-    for es in group_by_name.values():
-        es.sort(key=lambda x: x.timestamp_start_us)
-    merged = list()
-    for events in group_by_name.values():
-        if len(events) > 1:
-            tmp_merged = list()
-            while events:
-                event = events.pop(0)
-                if tmp_merged:
-                    last_event: MxRecEvent = tmp_merged.pop()
-                    if event.timestamp_start_us <= last_event.timestamp_end_us:
-                        last_event.timestamp_end_us = event.timestamp_start_us
-                        event.timestamp_end_us = max(
-                            event.timestamp_end_us, last_event.timestamp_end_us
-                        )
-                        last_event.duration_us = (
-                            last_event.timestamp_end_us - last_event.timestamp_start_us
-                        )
-                        event.duration_us = (
-                            event.timestamp_end_us - event.timestamp_start_us
-                        )
-                    tmp_merged.append(last_event)
-                    tmp_merged.append(event)
-                else:
-                    tmp_merged.append(event)
-            merged.extend(tmp_merged)
-        else:
-            merged.extend(events)
-    return merged
+def extract_op_events(op_summary_path: str) -> List[OpEvent]:
+    df = pd.read_csv(op_summary_path)
+    return [
+        OpEvent(
+            row["Device_id"],
+            row["Op Name"],
+            row["Op Type"],
+            row["Task Type"],
+            row["Task Start Time(us)"],
+            row["Task Duration(us)"],
+        )
+        for _, row in df.iterrows()
+    ]
 
 
 def get_timestamp(log_line: str) -> float:
@@ -192,13 +191,12 @@ def main():
     log_path = args.debug_log
     config = read_config()
 
-    mxrec_events = extract_events(log_path, config)
+    mxrec_events = extract_mxrec_events(log_path, config)
     tracing = list()
     tracing.extend(get_metadata(list(mxrec_events.keys())))
     for process in mxrec_events.values():
         for events in process.values():
-            merged = merge_multithread_timeline(events)
-            tracing.extend([TracingEvent(event) for event in merged])
+            tracing.extend([TracingEvent(event) for event in events])
 
     with open("mxrec_tracing.json", "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
-- 
Gitee


From fd96f6b75ce34329fd32dbaa65d60fe390160897 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 18 Jul 2024 16:08:33 +0800
Subject: [PATCH 12/34] add fusion tracing

---
 tools/perfrec-python/fusion_tracing.py | 89 ++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 12 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index b3c54e00..2392092b 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -1,9 +1,10 @@
 import argparse
 import json
+import os
 import re
 from collections import defaultdict
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple
 
 import pandas as pd
 import toml
@@ -65,7 +66,7 @@ def extract_op_events(op_summary_path: str) -> List[OpEvent]:
         OpEvent(
             row["Device_id"],
             row["Op Name"],
-            row["Op Type"],
+            row["OP Type"],
             row["Task Type"],
             row["Task Start Time(us)"],
             row["Task Duration(us)"],
@@ -108,7 +109,7 @@ def get_process_id(log_line: str) -> int:
         raise RuntimeError(f"There is no process_id in log: {log_line}")
 
 
-def read_config() -> Dict[str, str]:
+def read_mxrec_config() -> Dict[str, str]:
     config = toml.load("config.toml")
     mxrec_config = defaultdict(str)
     for pipe, event_list in config["mxrec"].items():
@@ -134,7 +135,7 @@ class TracingMetaData:
         self.args = args
 
 
-class TracingEvent:
+class TracingMxRecEvent:
     def __init__(self, mxrec_event: MxRecEvent):
         self.name = mxrec_event.name
         self.pid = mxrec_event.process_id
@@ -145,8 +146,19 @@ class TracingEvent:
         self.args = {}
 
 
+class TracingOpEvent:
+    def __init__(self, op_event: OpEvent, tid: int):
+        self.name = op_event.op_type
+        self.pid = get_op_pid(op_event)
+        self.tid = tid
+        self.ts = op_event.start_timestamp
+        self.dur = op_event.duration
+        self.ph = "X"
+        self.args = {"Op Name": op_event.op_name}
+
+
 def get_metadata(processes: List[int]) -> List[TracingMetaData]:
-    res = list()
+    metadata = list()
     pipes = get_pipes()
     for i, pid in enumerate(processes):
         metadata1 = TracingMetaData(
@@ -155,8 +167,8 @@ def get_metadata(processes: List[int]) -> List[TracingMetaData]:
         metadata2 = TracingMetaData(
             "process_sort_index", pid, 0, "M", {"sort_index": i}
         )
-        res.append(metadata1)
-        res.append(metadata2)
+        metadata.append(metadata1)
+        metadata.append(metadata2)
         for pipe_i, pipe in enumerate(pipes):
             pipe_metadata1 = TracingMetaData(
                 "thread_name",
@@ -172,31 +184,84 @@ def get_metadata(processes: List[int]) -> List[TracingMetaData]:
                 "M",
                 {"sort_index": pipe_i},
             )
-            res.append(pipe_metadata1)
-            res.append(pipe_metadata2)
-    return res
+            metadata.append(pipe_metadata1)
+            metadata.append(pipe_metadata2)
+    return metadata
 
 
 def get_fake_tid(pid: int, pipe_id: int) -> int:
     return pid * 10 + pipe_id
 
 
+def get_op_pid(op_event: OpEvent) -> int:
+    return 100 + op_event.device_id
+
+
+def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEvent]]:
+    task_types = defaultdict(int)
+    pids = set()
+    tids = set()
+    metadata = list()
+    op_tracing = list()
+
+    def new_process_metadata(pid, device_id):
+        metadata1 = TracingMetaData(
+            "process_name", pid, 0, "M", {"name": f"NPU {device_id}"}
+        )
+        metadata2 = TracingMetaData(
+            "process_sort_index", pid, 0, "M", {"sort_index": pid}
+        )
+        return [metadata1, metadata2]
+
+    def new_thread_metadata(pid, tid, name):
+        metadata1 = TracingMetaData("thread_name", pid, tid, "M", {"name": f"{name}"})
+        metadata2 = TracingMetaData(
+            "thread_sort_index", pid, tid, "M", {"sort_index": tid}
+        )
+        return [metadata1, metadata2]
+
+    for root, _, files in os.walk(path):
+        for file in files:
+            if file.startswith("op_summary") and file.endswith(".csv"):
+                file_path = os.path.join(root, file)
+                op_events = extract_op_events(file_path)
+                for event in op_events:
+                    pid = get_op_pid(event)
+                    if pid not in pids:
+                        pids.add(pid)
+                        metadata.extend(new_process_metadata(pid, event.device_id))
+                    if event.task_type not in task_types:
+                        task_id = len(task_types)
+                        task_types[event.task_type] = task_id
+                    tid = get_fake_tid(pid, task_types[event.task_type])
+                    if tid not in tids:
+                        tids.add(tid)
+                        metadata.extend(new_thread_metadata(pid, tid, event.task_type))
+                    op_tracing.append(TracingOpEvent(event, tid))
+    return (metadata, op_tracing)
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Generate CPU/NPU fusion tracing json."
     )
     parser.add_argument("debug_log", help="MxRec DEBUG level log flie path.")
+    parser.add_argument("msprof_output_path", help="msprof output path.")
     args = parser.parse_args()
 
     log_path = args.debug_log
-    config = read_config()
+    config = read_mxrec_config()
 
     mxrec_events = extract_mxrec_events(log_path, config)
     tracing = list()
     tracing.extend(get_metadata(list(mxrec_events.keys())))
     for process in mxrec_events.values():
         for events in process.values():
-            tracing.extend([TracingEvent(event) for event in events])
+            tracing.extend([TracingMxRecEvent(event) for event in events])
+
+    op_metadata, op_tracing = get_op_tracing(args.msprof_output_path)
+    tracing.extend(op_metadata)
+    tracing.extend(op_tracing)
 
     with open("mxrec_tracing.json", "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
-- 
Gitee


From 345bfb3afc461a76b94c950df69b8af72e4b41fd Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 18 Jul 2024 19:28:51 +0800
Subject: [PATCH 13/34] add comments

---
 tools/perfrec-python/fusion_tracing.py | 119 +++++++++++++++++++++++++
 tools/perfrec-python/perf.py           |  31 ++++++-
 2 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 2392092b..8301a241 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -11,6 +11,10 @@ import toml
 
 
 class MxRecEvent:
+    """
+    Class to represent an MxRec event.
+    """
+
     def __init__(self, log_line: str, event_name: str, pipe_id: int):
         timestamp_s = get_timestamp(log_line)
         duration_ms = get_duration(log_line, event_name)
@@ -24,6 +28,10 @@ class MxRecEvent:
 
 
 class OpEvent:
+    """
+    Class to represent an Op event.
+    """
+
     def __init__(
         self,
         device_id: int,
@@ -44,6 +52,16 @@ class OpEvent:
 def extract_mxrec_events(
     log_path: str, event_names: Dict[str, str]
 ) -> Dict[int, Dict[str, List[MxRecEvent]]]:
+    """
+    Extracts MxRec events from the log file.
+
+    Args:
+        log_path (str): Path to the log file.
+        event_names (Dict[str, str]): Dictionary mapping event names to pipe names.
+
+    Returns:
+        Dict[int, Dict[str, List[MxRecEvent]]]: Extracted MxRec events grouped by process ID and pipe.
+    """
     events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
         lambda: defaultdict(list)
     )
@@ -61,6 +79,15 @@ def extract_mxrec_events(
 
 
 def extract_op_events(op_summary_path: str) -> List[OpEvent]:
+    """
+    Extracts Op events from the CSV file.
+
+    Args:
+        op_summary_path (str): Path to the op summary CSV file.
+
+    Returns:
+        List[OpEvent]: List of extracted Op events.
+    """
     df = pd.read_csv(op_summary_path)
     return [
         OpEvent(
@@ -76,6 +103,15 @@ def extract_op_events(op_summary_path: str) -> List[OpEvent]:
 
 
 def get_timestamp(log_line: str) -> float:
+    """
+    Extracts the timestamp from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+
+    Returns:
+        float: The extracted timestamp as a float.
+    """
     pattern = r"\[(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\.\d+)\]"
     match = re.search(pattern, log_line)
     if match:
@@ -90,6 +126,16 @@ def get_timestamp(log_line: str) -> float:
 
 
 def get_duration(log_line: str, event_name: str) -> int:
+    """
+    Extracts the duration of an event from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+        event_name (str): The name of the event.
+
+    Returns:
+        int: The extracted duration in milliseconds.
+    """
     pattern = event_name + r".*:\s*(\d+)"
     match = re.search(pattern, log_line)
     if match:
@@ -100,6 +146,15 @@ def get_duration(log_line: str, event_name: str) -> int:
 
 
 def get_process_id(log_line: str) -> int:
+    """
+    Extracts the process ID from a log line.
+
+    Args:
+        log_line (str): A line from the log file.
+
+    Returns:
+        int: The extracted process ID.
+    """
     pattern = r"process_id:\s*(\d+)"
     match = re.search(pattern, log_line)
     if match:
@@ -110,6 +165,12 @@ def get_process_id(log_line: str) -> int:
 
 
 def read_mxrec_config() -> Dict[str, str]:
+    """
+    Reads the MxRec configuration from a TOML file.
+
+    Returns:
+        Dict[str, str]: Dictionary mapping event names to pipe names.
+    """
     config = toml.load("config.toml")
     mxrec_config = defaultdict(str)
     for pipe, event_list in config["mxrec"].items():
@@ -119,6 +180,12 @@ def read_mxrec_config() -> Dict[str, str]:
 
 
 def get_pipes() -> List[str]:
+    """
+    Reads the pipe names from the configuration file.
+
+    Returns:
+        List[str]: List of pipe names.
+    """
     config = toml.load("config.toml")
     pipes = list()
     for pipe in config["mxrec"].keys():
@@ -127,6 +194,10 @@ def get_pipes() -> List[str]:
 
 
 class TracingMetaData:
+    """
+    Class to represent metadata for tracing.
+    """
+
     def __init__(self, name: str, pid: int, tid: int, ph: str, args: Dict[str, Any]):
         self.name = name
         self.pid = pid
@@ -136,6 +207,10 @@ class TracingMetaData:
 
 
 class TracingMxRecEvent:
+    """
+    Class to represent a traced MxRec event.
+    """
+
     def __init__(self, mxrec_event: MxRecEvent):
         self.name = mxrec_event.name
         self.pid = mxrec_event.process_id
@@ -147,6 +222,10 @@ class TracingMxRecEvent:
 
 
 class TracingOpEvent:
+    """
+    Class to represent a traced Op event.
+    """
+
     def __init__(self, op_event: OpEvent, tid: int):
         self.name = op_event.op_type
         self.pid = get_op_pid(op_event)
@@ -158,6 +237,15 @@ class TracingOpEvent:
 
 
 def get_metadata(processes: List[int]) -> List[TracingMetaData]:
+    """
+    Generates metadata for tracing processes and threads.
+
+    Args:
+        processes (List[int]): List of process IDs.
+
+    Returns:
+        List[TracingMetaData]: List of tracing metadata.
+    """
     metadata = list()
     pipes = get_pipes()
     for i, pid in enumerate(processes):
@@ -190,14 +278,42 @@ def get_metadata(processes: List[int]) -> List[TracingMetaData]:
 
 
 def get_fake_tid(pid: int, pipe_id: int) -> int:
+    """
+    Generates a fake thread ID based on process ID and pipe ID.
+
+    Args:
+        pid (int): Process ID.
+        pipe_id (int): Pipe ID.
+
+    Returns:
+        int: Fake thread ID.
+    """
     return pid * 10 + pipe_id
 
 
 def get_op_pid(op_event: OpEvent) -> int:
+    """
+    Gets the process ID for an Op event.
+
+    Args:
+        op_event (OpEvent): An Op event.
+
+    Returns:
+        int: Process ID.
+    """
     return 100 + op_event.device_id
 
 
 def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEvent]]:
+    """
+    Generates tracing data for Op events.
+
+    Args:
+        path (str): Path to the directory containing Op event summaries.
+
+    Returns:
+        Tuple[List[TracingMetaData], List[TracingOpEvent]]: Metadata and tracing events.
+    """
     task_types = defaultdict(int)
     pids = set()
     tids = set()
@@ -242,6 +358,9 @@ def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEven
 
 
 def main():
+    """
+    Main function to parse arguments and generate tracing JSON.
+    """
     parser = argparse.ArgumentParser(
         description="Generate CPU/NPU fusion tracing json."
     )
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 4a7ddd52..98584b68 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -6,7 +6,15 @@ from collections import defaultdict
 from tabulate import tabulate
 
 
-def generate_flamegraph(perf_data, output_svg, flamegraph_path):
+def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -> None:
+    """
+    Generate a flamegraph from perf data.
+
+    Args:
+        perf_data (str): Path to the perf.data file.
+        output_svg (str): Path to the output SVG file.
+        flamegraph_path (str): Path to the Flamegraph scripts directory.
+    """
     # Ensure perf script is available
     try:
         subprocess.run(["perf", "--version"], check=True)
@@ -46,7 +54,13 @@ def generate_flamegraph(perf_data, output_svg, flamegraph_path):
     analyze_folded_stack(folded_output)
 
 
-def analyze_folded_stack(folded_output):
+def analyze_folded_stack(folded_output: str) -> None:
+    """
+    Analyzes the folded stack output to find functions with significant sample counts.
+
+    Args:
+        folded_output (str): Path to the folded stack output file.
+    """
     function_counts = defaultdict(int)
     total_count = 0
 
@@ -83,6 +97,16 @@ def analyze_folded_stack(folded_output):
 
 
 def limit_line(input: str, line_length: int) -> str:
+    """
+    Limits the length of a line to a specified number of characters, adding line breaks if necessary.
+
+    Args:
+        input (str): The input string.
+        line_length (int): The maximum line length.
+
+    Returns:
+        str: The formatted string with line breaks.
+    """
     if line_length >= len(input):
         return input
     limited_str = ""
@@ -98,6 +122,9 @@ def limit_line(input: str, line_length: int) -> str:
 
 
 def main():
+    """
+    Main function to parse arguments and generate a flamegraph.
+    """
     parser = argparse.ArgumentParser(description="Generate a Flamegraph from perf.data")
     parser.add_argument("perf_data", help="Path to the perf.data file")
     parser.add_argument("output_svg", help="Path to the output SVG file")
-- 
Gitee


From 2fb458b91d3f49de955ad0af1907cd0f89ea1fca Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Fri, 19 Jul 2024 10:20:28 +0800
Subject: [PATCH 14/34] fix bug

---
 tools/perfrec-python/fusion_tracing.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 8301a241..7944bf9f 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -65,6 +65,7 @@ def extract_mxrec_events(
     events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
         lambda: defaultdict(list)
     )
+    broken_line = list()
     pipe_names = get_pipes()
     pipe_ids = defaultdict(int)
     for i, pipe in enumerate(pipe_names):
@@ -73,8 +74,15 @@ def extract_mxrec_events(
         for line in log:
             for name, pipe in event_names.items():
                 if name in line:
-                    event = MxRecEvent(line, name, pipe_ids[pipe])
-                    events[event.process_id][pipe].append(event)
+                    try:
+                        event = MxRecEvent(line, name, pipe_ids[pipe])
+                        events[event.process_id][pipe].append(event)
+                    except Exception:
+                        broken_line.append(line)
+    if broken_line:
+        print("Warning: There are some log line broken")
+        for line in broken_line:
+            print(line)
     return events
 
 
@@ -338,7 +346,11 @@ def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEven
 
     for root, _, files in os.walk(path):
         for file in files:
-            if file.startswith("op_summary") and file.endswith(".csv"):
+            if (
+                root.endswith("mindstudio_profiler_output")
+                and file.startswith("op_summary")
+                and file.endswith(".csv")
+            ):
                 file_path = os.path.join(root, file)
                 op_events = extract_op_events(file_path)
                 for event in op_events:
-- 
Gitee


From b69581ec3411b899440ff495f5cb4b757074854d Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Fri, 19 Jul 2024 10:50:56 +0800
Subject: [PATCH 15/34] fix bug

---
 tools/perfrec-python/fusion_tracing.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 7944bf9f..888534b0 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -65,7 +65,7 @@ def extract_mxrec_events(
     events: Dict[int, Dict[str, List[MxRecEvent]]] = defaultdict(
         lambda: defaultdict(list)
     )
-    broken_line = list()
+    broken_lines = list()
     pipe_names = get_pipes()
     pipe_ids = defaultdict(int)
     for i, pipe in enumerate(pipe_names):
@@ -78,10 +78,10 @@ def extract_mxrec_events(
                         event = MxRecEvent(line, name, pipe_ids[pipe])
                         events[event.process_id][pipe].append(event)
                     except Exception:
-                        broken_line.append(line)
-    if broken_line:
-        print("Warning: There are some log line broken")
-        for line in broken_line:
+                        broken_lines.append(line)
+    if broken_lines:
+        print(f"Warning: There are {len(broken_lines)} broken log lines")
+        for line in broken_lines:
             print(line)
     return events
 
-- 
Gitee


From 873dbfc15ad8b9dfe405e7e8e96efc7b7a29abf4 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 14:39:50 +0800
Subject: [PATCH 16/34] npu optional

---
 tools/perfrec-python/fusion_tracing.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 888534b0..a178c2a6 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -377,7 +377,7 @@ def main():
         description="Generate CPU/NPU fusion tracing json."
     )
     parser.add_argument("debug_log", help="MxRec DEBUG level log flie path.")
-    parser.add_argument("msprof_output_path", help="msprof output path.")
+    parser.add_argument("--msprof_output", help="msprof output path.", required=False)
     args = parser.parse_args()
 
     log_path = args.debug_log
@@ -390,9 +390,11 @@ def main():
         for events in process.values():
             tracing.extend([TracingMxRecEvent(event) for event in events])
 
-    op_metadata, op_tracing = get_op_tracing(args.msprof_output_path)
-    tracing.extend(op_metadata)
-    tracing.extend(op_tracing)
+    msprof_output_path = args.msprof_output
+    if msprof_output_path:
+        op_metadata, op_tracing = get_op_tracing(msprof_output_path)
+        tracing.extend(op_metadata)
+        tracing.extend(op_tracing)
 
     with open("mxrec_tracing.json", "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
-- 
Gitee


From 1f233a628f74520d9619b56ec40124298c315813 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 14:45:44 +0800
Subject: [PATCH 17/34] npu optional

---
 tools/perfrec-python/fusion_tracing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index a178c2a6..2c88c360 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -376,7 +376,9 @@ def main():
     parser = argparse.ArgumentParser(
         description="Generate CPU/NPU fusion tracing json."
     )
-    parser.add_argument("debug_log", help="MxRec DEBUG level log flie path.")
+    parser.add_argument(
+        "--debug_log", help="MxRec DEBUG level log flie path.", required=True
+    )
     parser.add_argument("--msprof_output", help="msprof output path.", required=False)
     args = parser.parse_args()
 
-- 
Gitee


From 00f7cd2008c6fb395c68cc87ca8578795271f64a Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 15:28:25 +0800
Subject: [PATCH 18/34] write call stack to file

---
 tools/perfrec-python/perf.py | 57 +++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 98584b68..07ee2d97 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -54,6 +54,16 @@ def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -
     analyze_folded_stack(folded_output)
 
 
+class CallStack:
+    def __init__(self):
+        self.count = 0
+        self.call_stacks = []
+
+    def add_call_stacks(self, count: int, call_stack: str):
+        self.count += count
+        self.call_stacks.append(call_stack)
+
+
 def analyze_folded_stack(folded_output: str) -> None:
     """
     Analyzes the folded stack output to find functions with significant sample counts.
@@ -61,7 +71,8 @@ def analyze_folded_stack(folded_output: str) -> None:
     Args:
         folded_output (str): Path to the folded stack output file.
     """
-    function_counts = defaultdict(int)
+
+    function_counts = defaultdict(CallStack)
     total_count = 0
 
     # Read the folded stack output
@@ -71,24 +82,37 @@ def analyze_folded_stack(folded_output: str) -> None:
                 " ", 1
             )  # Use rsplit to handle function names with spaces
             count = int(parts[-1])
+            call_stack_str = parts[0]
             stack = parts[0].split(";")
-            function_counts[stack[-1]] += count
+            function_counts[stack[-1]].add_call_stacks(count, call_stack_str)
             total_count += count
 
     # Filter and display functions with more than 5% total count
     threshold = total_count * 0.05
     results = [
-        (func, count) for func, count in function_counts.items() if count >= threshold
+        (func, call_stack)
+        for func, call_stack in function_counts.items()
+        if call_stack.count >= threshold
     ]
 
     # Sort results by count in descending order
-    results.sort(key=lambda x: x[1], reverse=True)
+    results.sort(key=lambda x: x[1].count, reverse=True)
 
     # Prepare data for tabulate
+    # Write call stacks to file
     table_data = []
-    for func, count in results:
-        percentage = (count / total_count) * 100
-        table_data.append([limit_line(func, 50), count, f"{percentage:.2f}%"])
+    with open("call_stacks.txt", "w") as f:
+        for func, call_stack in results:
+            percentage = (call_stack.count / total_count) * 100
+            table_data.append(
+                [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
+            )
+            stacks = [stk for stk in call_stack.call_stacks]
+            f.writelines(
+                [f"func_name: {func}", f"percetage: {percentage:.2f}%", "call_stacks:"]
+                + stacks
+                + ["\n"]
+            )
 
     # Print the results using tabulate
     print("\nFunctions with more than 5% of total samples:\n")
@@ -125,11 +149,22 @@ def main():
     """
     Main function to parse arguments and generate a flamegraph.
     """
-    parser = argparse.ArgumentParser(description="Generate a Flamegraph from perf.data")
-    parser.add_argument("perf_data", help="Path to the perf.data file")
-    parser.add_argument("output_svg", help="Path to the output SVG file")
+    parser = argparse.ArgumentParser(
+        description="Generate a Flamegraph from perf.data."
+    )
+    parser.add_argument(
+        "--perf_data", help="Path to the perf.data file.", required=True
+    )
+    parser.add_argument(
+        "--output_svg",
+        help="Path to the output SVG file. (default: flamegraph.svg)",
+        required=False,
+        default="flamegraph.svg",
+    )
     parser.add_argument(
-        "flamegraph_path", help="Path to the Flamegraph scripts directory"
+        "--flamegraph_path",
+        help="Path to the Flamegraph Perl scripts directory.",
+        required=True,
     )
     args = parser.parse_args()
 
-- 
Gitee


From 289e406e150b84293636af12b1c6984631844a80 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 15:31:37 +0800
Subject: [PATCH 19/34] write call stack to file

---
 tools/perfrec-python/perf.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 07ee2d97..d2fbb177 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -107,11 +107,15 @@ def analyze_folded_stack(folded_output: str) -> None:
             table_data.append(
                 [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
             )
-            stacks = [stk for stk in call_stack.call_stacks]
+            stacks = [stk + "\n" for stk in call_stack.call_stacks]
             f.writelines(
-                [f"func_name: {func}", f"percetage: {percentage:.2f}%", "call_stacks:"]
+                [
+                    f"func_name: {func}\n",
+                    f"percetage: {percentage:.2f}%\n",
+                    "call_stacks:\n",
+                ]
                 + stacks
-                + ["\n"]
+                + ["\n\n"]
             )
 
     # Print the results using tabulate
-- 
Gitee


From ff3751f6b40ca580f87b21d27d56b19a7c050e20 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 15:41:51 +0800
Subject: [PATCH 20/34] write call stack to file

---
 tools/perfrec-python/config.toml |  4 ++++
 tools/perfrec-python/perf.py     | 20 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/tools/perfrec-python/config.toml b/tools/perfrec-python/config.toml
index 1bbc6aa9..81330e34 100644
--- a/tools/perfrec-python/config.toml
+++ b/tools/perfrec-python/config.toml
@@ -3,3 +3,7 @@ key_process = ["getBatchData", "getAndProcess"]
 process_emb_info = ["getAndSendTensors"]
 lookup_swap_addr = ["lookupAddrs"]
 embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
+
+[perf]
+threshold = 0.05
+ignores = ["[libc.so.6]"]
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index d2fbb177..3f675934 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -2,8 +2,10 @@ import argparse
 import os
 import subprocess
 from collections import defaultdict
+from typing import List
 
 from tabulate import tabulate
+import toml
 
 
 def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -> None:
@@ -87,12 +89,14 @@ def analyze_folded_stack(folded_output: str) -> None:
             function_counts[stack[-1]].add_call_stacks(count, call_stack_str)
             total_count += count
 
+    config = read_config()
+
     # Filter and display functions with more than 5% total count
-    threshold = total_count * 0.05
+    threshold = total_count * config.threshold
     results = [
         (func, call_stack)
         for func, call_stack in function_counts.items()
-        if call_stack.count >= threshold
+        if call_stack.count >= threshold and func not in config.ignores
     ]
 
     # Sort results by count in descending order
@@ -149,6 +153,18 @@ def limit_line(input: str, line_length: int) -> str:
     return limited_str
 
 
+class PerfConfig:
+    def __init__(self, threshold: int, ignores: List[str]):
+        self.threshold = threshold
+        self.ignores = set(ignores)
+
+
+def read_config() -> PerfConfig:
+    config = toml.load("config.toml")
+    perf_config = config["perf"]
+    return PerfConfig(perf_config["threshold"], perf_config["ignores"])
+
+
 def main():
     """
     Main function to parse arguments and generate a flamegraph.
-- 
Gitee


From 9c8c9cc85275d55299fab2bb7b46ede7bca24200 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 15:52:13 +0800
Subject: [PATCH 21/34] add comment

---
 tools/perfrec-python/perf.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 3f675934..83d9b3c5 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -160,6 +160,12 @@ class PerfConfig:
 
 
 def read_config() -> PerfConfig:
+    """
+    Reads configs related to `perf` from the configuration file.
+
+    Returns:
+        PerfConfig: Configuration class.
+    """
     config = toml.load("config.toml")
     perf_config = config["perf"]
     return PerfConfig(perf_config["threshold"], perf_config["ignores"])
-- 
Gitee


From 5419190015413f4335d69adeca325264d61a4ddc Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 16:16:10 +0800
Subject: [PATCH 22/34] add comment

---
 tools/perfrec-python/fusion_tracing.py | 52 +++++++++++++-------------
 tools/perfrec-python/perf.py           |  4 ++
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 2c88c360..9eb2f8e0 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -10,6 +10,20 @@ import pandas as pd
 import toml
 
 
+class MxRecConfig:
+    """
+    Configuration from `config.toml`.
+    """
+
+    def __init__(self, pipes: Dict[str, List[str]]):
+        self.pipes = pipes
+        self.func_to_pipe = defaultdict(str)
+        for pipe_name, event_list in self.pipes:
+            for event in event_list:
+                self.func_to_pipe[event] = pipe_name
+        self.pipe_names = [name for name in pipes.keys()]
+
+
 class MxRecEvent:
     """
     Class to represent an MxRec event.
@@ -50,14 +64,14 @@ class OpEvent:
 
 
 def extract_mxrec_events(
-    log_path: str, event_names: Dict[str, str]
+    log_path: str, config: MxRecConfig
 ) -> Dict[int, Dict[str, List[MxRecEvent]]]:
     """
     Extracts MxRec events from the log file.
 
     Args:
         log_path (str): Path to the log file.
-        event_names (Dict[str, str]): Dictionary mapping event names to pipe names.
+        config (MxRecConfig): Dictionary mapping event names to pipe names and other configs.
 
     Returns:
         Dict[int, Dict[str, List[MxRecEvent]]]: Extracted MxRec events grouped by process ID and pipe.
@@ -66,7 +80,8 @@ def extract_mxrec_events(
         lambda: defaultdict(list)
     )
     broken_lines = list()
-    pipe_names = get_pipes()
+    event_names = config.func_to_pipe
+    pipe_names = config.pipe_names
     pipe_ids = defaultdict(int)
     for i, pipe in enumerate(pipe_names):
         pipe_ids[pipe] = i
@@ -172,33 +187,15 @@ def get_process_id(log_line: str) -> int:
         raise RuntimeError(f"There is no process_id in log: {log_line}")
 
 
-def read_mxrec_config() -> Dict[str, str]:
+def read_mxrec_config() -> MxRecConfig:
     """
     Reads the MxRec configuration from a TOML file.
 
     Returns:
-        Dict[str, str]: Dictionary mapping event names to pipe names.
-    """
-    config = toml.load("config.toml")
-    mxrec_config = defaultdict(str)
-    for pipe, event_list in config["mxrec"].items():
-        for event in event_list:
-            mxrec_config[event] = pipe
-    return mxrec_config
-
-
-def get_pipes() -> List[str]:
-    """
-    Reads the pipe names from the configuration file.
-
-    Returns:
-        List[str]: List of pipe names.
+        MxRecCofig: Configuration class.
     """
     config = toml.load("config.toml")
-    pipes = list()
-    for pipe in config["mxrec"].keys():
-        pipes.append(pipe)
-    return pipes
+    return MxRecConfig(config["mxrec"])
 
 
 class TracingMetaData:
@@ -244,18 +241,19 @@ class TracingOpEvent:
         self.args = {"Op Name": op_event.op_name}
 
 
-def get_metadata(processes: List[int]) -> List[TracingMetaData]:
+def get_metadata(processes: List[int], config: MxRecConfig) -> List[TracingMetaData]:
     """
     Generates metadata for tracing processes and threads.
 
     Args:
         processes (List[int]): List of process IDs.
+        config (MxRecConfig): Configuration class.
 
     Returns:
         List[TracingMetaData]: List of tracing metadata.
     """
     metadata = list()
-    pipes = get_pipes()
+    pipes = config.pipe_names
     for i, pid in enumerate(processes):
         metadata1 = TracingMetaData(
             "process_name", pid, 0, "M", {"name": f"MxRec process {i}"}
@@ -387,7 +385,7 @@ def main():
 
     mxrec_events = extract_mxrec_events(log_path, config)
     tracing = list()
-    tracing.extend(get_metadata(list(mxrec_events.keys())))
+    tracing.extend(get_metadata(list(mxrec_events.keys()), config))
     for process in mxrec_events.values():
         for events in process.values():
             tracing.extend([TracingMxRecEvent(event) for event in events])
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 83d9b3c5..04f69fa4 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -154,6 +154,10 @@ def limit_line(input: str, line_length: int) -> str:
 
 
 class PerfConfig:
+    """
+    Configuration from `config.toml`.
+    """
+
     def __init__(self, threshold: int, ignores: List[str]):
         self.threshold = threshold
         self.ignores = set(ignores)
-- 
Gitee


From 365987a122b864df4185beea1b41f2785a1b0e45 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Mon, 22 Jul 2024 20:29:55 +0800
Subject: [PATCH 23/34] cleancode

---
 tools/perfrec-python/fusion_tracing.py |  9 ++--
 tools/perfrec-python/perf.py           | 61 ++++++++++++++++----------
 2 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 9eb2f8e0..7f192195 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -1,5 +1,6 @@
 import argparse
 import json
+import logging
 import os
 import re
 from collections import defaultdict
@@ -95,9 +96,9 @@ def extract_mxrec_events(
                     except Exception:
                         broken_lines.append(line)
     if broken_lines:
-        print(f"Warning: There are {len(broken_lines)} broken log lines")
+        logging.warning(f"There are {len(broken_lines)} broken log lines")
         for line in broken_lines:
-            print(line)
+            logging.warning(line)
     return events
 
 
@@ -371,6 +372,7 @@ def main():
     """
     Main function to parse arguments and generate tracing JSON.
     """
+    logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(
         description="Generate CPU/NPU fusion tracing json."
     )
@@ -396,7 +398,8 @@ def main():
         tracing.extend(op_metadata)
         tracing.extend(op_tracing)
 
-    with open("mxrec_tracing.json", "w") as file:
+    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT, 0o644)
+    with os.fdopen(fd, "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
 
 
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 04f69fa4..31113c7b 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -1,14 +1,17 @@
 import argparse
+import logging
 import os
 import subprocess
 from collections import defaultdict
 from typing import List
 
-from tabulate import tabulate
 import toml
+from tabulate import tabulate
 
 
-def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -> None:
+def generate_flamegraph(
+    perf_bin: str, perf_data: str, output_svg: str, flamegraph_path: str
+) -> None:
     """
     Generate a flamegraph from perf data.
 
@@ -19,9 +22,9 @@ def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -
     """
     # Ensure perf script is available
     try:
-        subprocess.run(["perf", "--version"], check=True)
+        subprocess.run([perf_bin, "--version"], check=True)
     except subprocess.CalledProcessError:
-        print("Error: perf is not installed or not in PATH.")
+        logging.error("perf is not installed or not in PATH.")
         return
 
     # Ensure Flamegraph scripts are available
@@ -31,26 +34,28 @@ def generate_flamegraph(perf_data: str, output_svg: str, flamegraph_path: str) -
     if not os.path.isfile(stackcollapse_path) or not os.path.isfile(
         flamegraph_script_path
     ):
-        print(
-            f"Error: Flamegraph scripts not found in the provided directory {flamegraph_path}."
+        logging.error(
+            f"Flamegraph scripts not found in the provided directory {flamegraph_path}."
         )
         return
 
     # Generate the folded stack output
     folded_output = perf_data + ".folded"
-    with open(folded_output, "w") as f:
+    fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o644)
+    with os.fdopen(fd, "w") as f:
         script_output = subprocess.run(
-            ["perf", "script", "-i", perf_data], check=True, stdout=subprocess.PIPE
+            [perf_bin, "script", "-i", perf_data], check=True, stdout=subprocess.PIPE
         )
         subprocess.run(
             [stackcollapse_path], check=True, input=script_output.stdout, stdout=f
         )
 
     # Generate the flamegraph
-    with open(output_svg, "w") as f:
+    fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o644)
+    with os.fdopen(fd_svg, "w") as f:
         subprocess.run([flamegraph_script_path, folded_output], check=True, stdout=f)
 
-    print(f"Flamegraph generated at {output_svg}")
+    logging.info(f"Flamegraph generated at {output_svg}")
 
     # Analyze the folded stack output
     analyze_folded_stack(folded_output)
@@ -105,11 +110,14 @@ def analyze_folded_stack(folded_output: str) -> None:
     # Prepare data for tabulate
     # Write call stacks to file
     table_data = []
-    with open("call_stacks.txt", "w") as f:
+    fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o644)
+    with os.fdopen(fd_call_stacks, "w") as f:
         for func, call_stack in results:
-            percentage = (call_stack.count / total_count) * 100
+            percentage = (
+                (call_stack.count / total_count) * 100 if total_count > 0 else 0
+            )
             table_data.append(
-                [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
+                [limit_str_per_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
             )
             stacks = [stk + "\n" for stk in call_stack.call_stacks]
             f.writelines(
@@ -123,12 +131,12 @@ def analyze_folded_stack(folded_output: str) -> None:
             )
 
     # Print the results using tabulate
-    print("\nFunctions with more than 5% of total samples:\n")
+    logging.info("\nFunctions with more than 5% of total samples:\n")
     headers = ["Function", "Count", "Percentage"]
-    print(tabulate(table_data, headers=headers, tablefmt="grid"))
+    logging.info(tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
-def limit_line(input: str, line_length: int) -> str:
+def limit_str_per_line(input: str, line_length: int) -> str:
     """
     Limits the length of a line to a specified number of characters, adding line breaks if necessary.
 
@@ -179,26 +187,35 @@ def main():
     """
     Main function to parse arguments and generate a flamegraph.
     """
+    logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(
         description="Generate a Flamegraph from perf.data."
     )
     parser.add_argument(
         "--perf_data", help="Path to the perf.data file.", required=True
     )
+    parser.add_argument(
+        "--flamegraph_path",
+        help="Path to the Flamegraph Perl scripts directory.",
+        required=True,
+    )
+    parser.add_argument(
+        "--perf_bin",
+        help="Path to perf exacutable binary file. (default: perf)",
+        required=False,
+        default="perf",
+    )
     parser.add_argument(
         "--output_svg",
         help="Path to the output SVG file. (default: flamegraph.svg)",
         required=False,
         default="flamegraph.svg",
     )
-    parser.add_argument(
-        "--flamegraph_path",
-        help="Path to the Flamegraph Perl scripts directory.",
-        required=True,
-    )
     args = parser.parse_args()
 
-    generate_flamegraph(args.perf_data, args.output_svg, args.flamegraph_path)
+    generate_flamegraph(
+        args.perf_bin, args.perf_data, args.output_svg, args.flamegraph_path
+    )
 
 
 if __name__ == "__main__":
-- 
Gitee


From bd5c38eb4342d44c9485d3ced9988796a9acc7b8 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 09:05:14 +0800
Subject: [PATCH 24/34] cleancode

---
 tools/perfrec-python/config.toml       | 18 ++++++++++++
 tools/perfrec-python/fusion_tracing.py | 19 ++++++++++++-
 tools/perfrec-python/perf.py           | 39 +++++++++++++++++++-------
 3 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/tools/perfrec-python/config.toml b/tools/perfrec-python/config.toml
index 81330e34..90187313 100644
--- a/tools/perfrec-python/config.toml
+++ b/tools/perfrec-python/config.toml
@@ -1,9 +1,27 @@
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 [mxrec]
+# pipe name and time cost name
 key_process = ["getBatchData", "getAndProcess"]
 process_emb_info = ["getAndSendTensors"]
 lookup_swap_addr = ["lookupAddrs"]
 embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
 
 [perf]
+# filter percentage of time cost
 threshold = 0.05
+# ignore function list
 ignores = ["[libc.so.6]"]
diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 7f192195..94365fa9 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import argparse
 import json
 import logging
@@ -96,7 +113,7 @@ def extract_mxrec_events(
                     except Exception:
                         broken_lines.append(line)
     if broken_lines:
-        logging.warning(f"There are {len(broken_lines)} broken log lines")
+        logging.warning("There are %d broken log lines", len(broken_lines))
         for line in broken_lines:
             logging.warning(line)
     return events
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 31113c7b..9b6e0b8e 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 import argparse
 import logging
 import os
@@ -35,7 +52,8 @@ def generate_flamegraph(
         flamegraph_script_path
     ):
         logging.error(
-            f"Flamegraph scripts not found in the provided directory {flamegraph_path}."
+            "Flamegraph scripts not found in the provided directory %s.",
+            flamegraph_path,
         )
         return
 
@@ -55,7 +73,7 @@ def generate_flamegraph(
     with os.fdopen(fd_svg, "w") as f:
         subprocess.run([flamegraph_script_path, folded_output], check=True, stdout=f)
 
-    logging.info(f"Flamegraph generated at {output_svg}")
+    logging.info("Flamegraph generated at %s", output_svg)
 
     # Analyze the folded stack output
     analyze_folded_stack(folded_output)
@@ -113,11 +131,12 @@ def analyze_folded_stack(folded_output: str) -> None:
     fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o644)
     with os.fdopen(fd_call_stacks, "w") as f:
         for func, call_stack in results:
-            percentage = (
-                (call_stack.count / total_count) * 100 if total_count > 0 else 0
-            )
+            if total_count > 0:
+                percentage = (call_stack.count / total_count) * 100
+            else:
+                percentage = 0
             table_data.append(
-                [limit_str_per_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
+                [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
             )
             stacks = [stk + "\n" for stk in call_stack.call_stacks]
             f.writelines(
@@ -136,7 +155,7 @@ def analyze_folded_stack(folded_output: str) -> None:
     logging.info(tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
-def limit_str_per_line(input: str, line_length: int) -> str:
+def limit_line(input_content: str, line_length: int) -> str:
     """
     Limits the length of a line to a specified number of characters, adding line breaks if necessary.
 
@@ -147,12 +166,12 @@ def limit_str_per_line(input: str, line_length: int) -> str:
     Returns:
         str: The formatted string with line breaks.
     """
-    if line_length >= len(input):
-        return input
+    if line_length >= len(input_content):
+        return input_content
     limited_str = ""
     if line_length > 0:
         count = 0
-        for c in input:
+        for c in input_content:
             if count >= line_length:
                 limited_str += "\n"
                 count = 0
-- 
Gitee


From dc8e6e9f82ac5d8a0a6c6791c86f1b886c1e56d7 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 09:27:26 +0800
Subject: [PATCH 25/34] cleancode

---
 tools/perfrec-python/perf.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 9b6e0b8e..ca6ea4a4 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -131,10 +131,9 @@ def analyze_folded_stack(folded_output: str) -> None:
     fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o644)
     with os.fdopen(fd_call_stacks, "w") as f:
         for func, call_stack in results:
-            if total_count > 0:
-                percentage = (call_stack.count / total_count) * 100
-            else:
-                percentage = 0
+            percentage = (
+                (call_stack.count / total_count) * 100 if total_count != 0 else 0
+            )
             table_data.append(
                 [limit_line(func, 50), call_stack.count, f"{percentage:.2f}%"]
             )
-- 
Gitee


From 32678e4af61c69e5c8713148c29560429b1eeb92 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 10:37:27 +0800
Subject: [PATCH 26/34] add README.md

---
 tools/perfrec-python/README.md | 64 ++++++++++++++++++++++++++++++++++
 tools/perfrec-python/perf.py   |  3 +-
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 tools/perfrec-python/README.md

diff --git a/tools/perfrec-python/README.md b/tools/perfrec-python/README.md
new file mode 100644
index 00000000..ccb47949
--- /dev/null
+++ b/tools/perfrec-python/README.md
@@ -0,0 +1,64 @@
+## perf.py
+```
+usage: perf.py [-h] --perf_data PERF_DATA --flamegraph_path FLAMEGRAPH_PATH
+               [--perf_bin PERF_BIN] [--output_svg OUTPUT_SVG]
+
+Generate a Flamegraph from perf.data.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --perf_data PERF_DATA
+                        Path to the perf.data file.
+  --flamegraph_path FLAMEGRAPH_PATH
+                        Path to the Flamegraph Perl scripts directory.
+  --perf_bin PERF_BIN   Path to perf exacutable binary file. (default: perf)
+  --output_svg OUTPUT_SVG
+                        Path to the output SVG file. (default: flamegraph.svg)
+```
+#### 使用示例
+```bash
+python perf.py --perf_data perf.data --flamegraph_path /ws/FlameGraph 
+```
+#### 可选配置
+```toml
+# config.toml
+
+[perf]
+# filter percentage of time cost
+threshold = 0.05
+# ignore function list
+ignores = ["[libc.so.6]"]
+```
+
+## fusion_tracing.py
+```
+usage: fusion_tracing.py [-h] --debug_log DEBUG_LOG
+                         [--msprof_output MSPROF_OUTPUT]
+
+Generate CPU/NPU fusion tracing json.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --debug_log DEBUG_LOG
+                        MxRec DEBUG level log flie path.
+  --msprof_output MSPROF_OUTPUT
+                        msprof output path.
+```
+#### 使用示例
+```bash
+# only cpu
+python fusion_tracing.py --debug_log ../../example/demo/little_demo/temp.log
+# cpu + npu
+python fusion_tracing.py --debug_log ../../example/demo/little_demo/temp.log --msprof_output ../../example/demo/little_demo/msprof
+```
+#### 可选配置
+```toml
+# config.toml
+
+[mxrec]
+# pipe name and time cost name
+key_process = ["getBatchData", "getAndProcess"]
+process_emb_info = ["getAndSendTensors"]
+lookup_swap_addr = ["lookupAddrs"]
+embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
+```
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index ca6ea4a4..d1f56279 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -19,6 +19,7 @@ import argparse
 import logging
 import os
 import subprocess
+import sys
 from collections import defaultdict
 from typing import List
 
@@ -151,7 +152,7 @@ def analyze_folded_stack(folded_output: str) -> None:
     # Print the results using tabulate
     logging.info("\nFunctions with more than 5% of total samples:\n")
     headers = ["Function", "Count", "Percentage"]
-    logging.info(tabulate(table_data, headers=headers, tablefmt="grid"))
+    sys.stdout.write(tabulate(table_data, headers=headers, tablefmt="grid") + "\n")
 
 
 def limit_line(input_content: str, line_length: int) -> str:
-- 
Gitee


From 1a263f32ab13f41fd61f8eab4cfc046188487222 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 10:42:38 +0800
Subject: [PATCH 27/34] fix bug and cleancode

---
 tools/perfrec-python/fusion_tracing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 94365fa9..830803a4 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -36,7 +36,7 @@ class MxRecConfig:
     def __init__(self, pipes: Dict[str, List[str]]):
         self.pipes = pipes
         self.func_to_pipe = defaultdict(str)
-        for pipe_name, event_list in self.pipes:
+        for pipe_name, event_list in self.pipes.items():
             for event in event_list:
                 self.func_to_pipe[event] = pipe_name
         self.pipe_names = [name for name in pipes.keys()]
@@ -394,7 +394,7 @@ def main():
         description="Generate CPU/NPU fusion tracing json."
     )
     parser.add_argument(
-        "--debug_log", help="MxRec DEBUG level log flie path.", required=True
+        "--debug_log", help="MxRec DEBUG level log file path.", required=True
     )
     parser.add_argument("--msprof_output", help="msprof output path.", required=False)
     args = parser.parse_args()
-- 
Gitee


From 7d5ded96939ddd6b240e6b024bc6dc00916ad2ab Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 16:33:47 +0800
Subject: [PATCH 28/34] cleancode

---
 tools/perfrec-python/README.md         |   6 +-
 tools/perfrec-python/config.toml       |   6 +-
 tools/perfrec-python/fusion_tracing.py | 120 ++++++++++++-------------
 tools/perfrec-python/perf.py           |  39 ++++----
 4 files changed, 85 insertions(+), 86 deletions(-)

diff --git a/tools/perfrec-python/README.md b/tools/perfrec-python/README.md
index ccb47949..2e7da2e4 100644
--- a/tools/perfrec-python/README.md
+++ b/tools/perfrec-python/README.md
@@ -24,9 +24,9 @@ python perf.py --perf_data perf.data --flamegraph_path /ws/FlameGraph
 # config.toml
 
 [perf]
-# filter percentage of time cost
+# Filter percentage of time cost
 threshold = 0.05
-# ignore function list
+# Ignore function list
 ignores = ["[libc.so.6]"]
 ```
 
@@ -56,7 +56,7 @@ python fusion_tracing.py --debug_log ../../example/demo/little_demo/temp.log --m
 # config.toml
 
 [mxrec]
-# pipe name and time cost name
+# Pipe name and time cost name
 key_process = ["getBatchData", "getAndProcess"]
 process_emb_info = ["getAndSendTensors"]
 lookup_swap_addr = ["lookupAddrs"]
diff --git a/tools/perfrec-python/config.toml b/tools/perfrec-python/config.toml
index 90187313..8e15fd1d 100644
--- a/tools/perfrec-python/config.toml
+++ b/tools/perfrec-python/config.toml
@@ -14,14 +14,14 @@
 # ==============================================================================
 
 [mxrec]
-# pipe name and time cost name
+# Pipe name and time cost name
 key_process = ["getBatchData", "getAndProcess"]
 process_emb_info = ["getAndSendTensors"]
 lookup_swap_addr = ["lookupAddrs"]
 embedding_recv = ["EmbeddingRecv", "EmbeddingUpdate", "SendH2DEmb"]
 
 [perf]
-# filter percentage of time cost
+# Filter percentage of time cost
 threshold = 0.05
-# ignore function list
+# Ignore function list
 ignores = ["[libc.so.6]"]
diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 830803a4..0ed4a9fd 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -16,6 +16,7 @@
 # ==============================================================================
 
 import argparse
+from dataclasses import dataclass
 import json
 import logging
 import os
@@ -51,34 +52,26 @@ class MxRecEvent:
         timestamp_s = get_timestamp(log_line)
         duration_ms = get_duration(log_line, event_name)
         process_id = get_process_id(log_line)
-        self.timestamp_start_us = timestamp_s * 1e6 - float(duration_ms) * 1e3
-        self.duration_us = float(duration_ms) * 1e3
+        self.timestamp_start_us = timestamp_s * 1e6 - duration_ms * 1e3
+        self.duration_us = duration_ms * 1e3
         self.timestamp_end_us = timestamp_s * 1e6
         self.process_id = process_id
         self.name = event_name
         self.pipe_id = pipe_id
 
 
+@dataclass
 class OpEvent:
     """
     Class to represent an Op event.
     """
 
-    def __init__(
-        self,
-        device_id: int,
-        op_name: str,
-        op_type: str,
-        task_type: str,
-        start_timestamp: float,
-        duration: float,
-    ):
-        self.device_id = device_id
-        self.op_name = op_name
-        self.op_type = op_type
-        self.task_type = task_type
-        self.start_timestamp = start_timestamp
-        self.duration = duration
+    device_id: int
+    op_name: str
+    op_type: str
+    task_type: str
+    start_timestamp: float
+    duration: float
 
 
 def extract_mxrec_events(
@@ -105,13 +98,12 @@ def extract_mxrec_events(
         pipe_ids[pipe] = i
     with open(log_path) as log:
         for line in log:
-            for name, pipe in event_names.items():
-                if name in line:
-                    try:
-                        event = MxRecEvent(line, name, pipe_ids[pipe])
-                        events[event.process_id][pipe].append(event)
-                    except Exception:
-                        broken_lines.append(line)
+            for name, pipe in filter(lambda item: item[0] in line, event_names.items()):
+                try:
+                    event = MxRecEvent(line, name, pipe_ids[pipe])
+                    events[event.process_id][pipe].append(event)
+                except RuntimeError:
+                    broken_lines.append(line)
     if broken_lines:
         logging.warning("There are %d broken log lines", len(broken_lines))
         for line in broken_lines:
@@ -155,18 +147,17 @@ def get_timestamp(log_line: str) -> float:
     """
     pattern = r"\[(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\.\d+)\]"
     match = re.search(pattern, log_line)
-    if match:
-        date_time_str = match.group(1)
-        date_time_format = "%Y/%m/%d %H:%M:%S.%f"
-        # Parse the date-time string into a datetime object
-        date_time_obj = datetime.strptime(date_time_str, date_time_format)
-        # Convert the datetime object to a timestamp
-        return date_time_obj.timestamp()
-    else:
-        raise RuntimeError(f"There is no time in log: {log_line}")
+    if not match:
+        raise RuntimeError(f"there is no time in log: {log_line}")
+    date_time_str = match.group(1)
+    date_time_format = "%Y/%m/%d %H:%M:%S.%f"
+    # Parse the date-time string into a datetime object
+    date_time_obj = datetime.strptime(date_time_str, date_time_format)
+    # Convert the datetime object to a timestamp
+    return date_time_obj.timestamp()
 
 
-def get_duration(log_line: str, event_name: str) -> int:
+def get_duration(log_line: str, event_name: str) -> float:
     """
     Extracts the duration of an event from a log line.
 
@@ -179,11 +170,10 @@ def get_duration(log_line: str, event_name: str) -> int:
     """
     pattern = event_name + r".*:\s*(\d+)"
     match = re.search(pattern, log_line)
-    if match:
-        duration_ms = match.group(1)
-        return int(duration_ms)
-    else:
-        raise RuntimeError(f"There is no event: {event_name}, log: {log_line}")
+    if not match:
+        raise RuntimeError(f"there is no event: {event_name}, log: {log_line}")
+    duration_ms = match.group(1)
+    return float(duration_ms)
 
 
 def get_process_id(log_line: str) -> int:
@@ -196,13 +186,12 @@ def get_process_id(log_line: str) -> int:
     Returns:
         int: The extracted process ID.
     """
-    pattern = r"process_id:\s*(\d+)"
+    pattern = r"\[(\d+)\]"
     match = re.search(pattern, log_line)
-    if match:
-        process_id = match.group(1)
-        return int(process_id)
-    else:
-        raise RuntimeError(f"There is no process_id in log: {log_line}")
+    if not match:
+        raise RuntimeError(f"there is no process_id in log: {log_line}")
+    process_id = match.group(1)
+    return int(process_id)
 
 
 def read_mxrec_config() -> MxRecConfig:
@@ -212,21 +201,25 @@ def read_mxrec_config() -> MxRecConfig:
     Returns:
         MxRecCofig: Configuration class.
     """
-    config = toml.load("config.toml")
-    return MxRecConfig(config["mxrec"])
+    try:
+        config = toml.load("config.toml")
+        return MxRecConfig(config["mxrec"])
+    except toml.TomlDecodeError:
+        logging.error("Can not open or load the config.toml.")
+        exit(1)
 
 
+@dataclass
 class TracingMetaData:
     """
     Class to represent metadata for tracing.
     """
 
-    def __init__(self, name: str, pid: int, tid: int, ph: str, args: Dict[str, Any]):
-        self.name = name
-        self.pid = pid
-        self.tid = tid
-        self.ph = ph
-        self.args = args
+    name: str
+    pid: int
+    tid: int
+    ph: str
+    args: Dict[str, Any]
 
 
 class TracingMxRecEvent:
@@ -325,6 +318,7 @@ def get_op_pid(op_event: OpEvent) -> int:
     Returns:
         int: Process ID.
     """
+    # add 100 avoiding confict with cpu pid(rand_id)
     return 100 + op_event.device_id
 
 
@@ -370,19 +364,23 @@ def get_op_tracing(path: str) -> Tuple[List[TracingMetaData], List[TracingOpEven
                 file_path = os.path.join(root, file)
                 op_events = extract_op_events(file_path)
                 for event in op_events:
-                    pid = get_op_pid(event)
-                    if pid not in pids:
-                        pids.add(pid)
-                        metadata.extend(new_process_metadata(pid, event.device_id))
+                    process_id = get_op_pid(event)
+                    if process_id not in pids:
+                        pids.add(process_id)
+                        metadata.extend(
+                            new_process_metadata(process_id, event.device_id)
+                        )
                     if event.task_type not in task_types:
                         task_id = len(task_types)
                         task_types[event.task_type] = task_id
-                    tid = get_fake_tid(pid, task_types[event.task_type])
+                    tid = get_fake_tid(process_id, task_types[event.task_type])
                     if tid not in tids:
                         tids.add(tid)
-                        metadata.extend(new_thread_metadata(pid, tid, event.task_type))
+                        metadata.extend(
+                            new_thread_metadata(process_id, tid, event.task_type)
+                        )
                     op_tracing.append(TracingOpEvent(event, tid))
-    return (metadata, op_tracing)
+    return metadata, op_tracing
 
 
 def main():
@@ -415,7 +413,7 @@ def main():
         tracing.extend(op_metadata)
         tracing.extend(op_tracing)
 
-    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT, 0o644)
+    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd, "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
 
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index d1f56279..4ff149a1 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -19,7 +19,6 @@ import argparse
 import logging
 import os
 import subprocess
-import sys
 from collections import defaultdict
 from typing import List
 
@@ -60,7 +59,7 @@ def generate_flamegraph(
 
     # Generate the folded stack output
     folded_output = perf_data + ".folded"
-    fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o644)
+    fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd, "w") as f:
         script_output = subprocess.run(
             [perf_bin, "script", "-i", perf_data], check=True, stdout=subprocess.PIPE
@@ -70,7 +69,7 @@ def generate_flamegraph(
         )
 
     # Generate the flamegraph
-    fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o644)
+    fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd_svg, "w") as f:
         subprocess.run([flamegraph_script_path, folded_output], check=True, stdout=f)
 
@@ -102,6 +101,8 @@ def analyze_folded_stack(folded_output: str) -> None:
     total_count = 0
 
     # Read the folded stack output
+    # Line of folded stack example:
+    # python3.7;[libascendalog.so];access;__sys_trace_return;prepare_creds 10101010
     with open(folded_output, "r") as f:
         for line in f:
             parts = line.strip().rsplit(
@@ -129,7 +130,7 @@ def analyze_folded_stack(folded_output: str) -> None:
     # Prepare data for tabulate
     # Write call stacks to file
     table_data = []
-    fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o644)
+    fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd_call_stacks, "w") as f:
         for func, call_stack in results:
             percentage = (
@@ -142,7 +143,7 @@ def analyze_folded_stack(folded_output: str) -> None:
             f.writelines(
                 [
                     f"func_name: {func}\n",
-                    f"percetage: {percentage:.2f}%\n",
+                    f"percentage: {percentage:.2f}%\n",
                     "call_stacks:\n",
                 ]
                 + stacks
@@ -150,9 +151,9 @@ def analyze_folded_stack(folded_output: str) -> None:
             )
 
     # Print the results using tabulate
-    logging.info("\nFunctions with more than 5% of total samples:\n")
+    logging.info("\nFunctions with more than 5% of total samples:")
     headers = ["Function", "Count", "Percentage"]
-    sys.stdout.write(tabulate(table_data, headers=headers, tablefmt="grid") + "\n")
+    logging.info("\n" + tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
 def limit_line(input_content: str, line_length: int) -> str:
@@ -160,7 +161,7 @@ def limit_line(input_content: str, line_length: int) -> str:
     Limits the length of a line to a specified number of characters, adding line breaks if necessary.
 
     Args:
-        input (str): The input string.
+        input_content (str): The input string.
         line_length (int): The maximum line length.
 
     Returns:
@@ -170,13 +171,10 @@ def limit_line(input_content: str, line_length: int) -> str:
         return input_content
     limited_str = ""
     if line_length > 0:
-        count = 0
-        for c in input_content:
-            if count >= line_length:
-                limited_str += "\n"
-                count = 0
-            limited_str += c
-            count += 1
+        limited_str = "\n".join(
+            input_content[i : i + line_length]
+            for i in range(len(input_content), line_length)
+        )
     return limited_str
 
 
@@ -185,7 +183,7 @@ class PerfConfig:
     Configuration from `config.toml`.
     """
 
-    def __init__(self, threshold: int, ignores: List[str]):
+    def __init__(self, threshold: float = 0.05, ignores: List[str] = []):
         self.threshold = threshold
         self.ignores = set(ignores)
 
@@ -197,9 +195,12 @@ def read_config() -> PerfConfig:
     Returns:
         PerfConfig: Configuration class.
     """
-    config = toml.load("config.toml")
-    perf_config = config["perf"]
-    return PerfConfig(perf_config["threshold"], perf_config["ignores"])
+    try:
+        config = toml.load("config.toml")
+        perf_config = config["perf"]
+        return PerfConfig(perf_config["threshold"], perf_config["ignores"])
+    except toml.TomlDecodeError:
+        return PerfConfig()
 
 
 def main():
-- 
Gitee


From 251f0b6d644dc129f6d0cb242bbd3062ba08ca58 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 16:48:25 +0800
Subject: [PATCH 29/34] add README

---
 tools/perfrec-python/README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/perfrec-python/README.md b/tools/perfrec-python/README.md
index 2e7da2e4..ddc7e114 100644
--- a/tools/perfrec-python/README.md
+++ b/tools/perfrec-python/README.md
@@ -16,6 +16,23 @@ optional arguments:
                         Path to the output SVG file. (default: flamegraph.svg)
 ```
 #### 使用示例
+
+参考以下脚本使用`perf`采集数据。
+```bash
+pid=$(top -b -n 1 | head -n 8 | tail -n 1 | awk '{print $1}')
+if [ -z "$pid" ];then
+    echo "未获取到进程ID"
+    exit 1
+fi
+perf record -F 99 -p $pid -a -g -- sleep 60
+if [ $? -ne 0 ]; then
+    echo "perf record执行失败"
+    exit 1
+fi
+echo "perf.data 采集完成"
+```
+
+使用本工具生成火焰图和耗时函数分析。
 ```bash
 python perf.py --perf_data perf.data --flamegraph_path /ws/FlameGraph 
 ```
-- 
Gitee


From 048ebbd8d23a550edc09e68232c3115d5a05140e Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 16:59:08 +0800
Subject: [PATCH 30/34] add shell=False

---
 tools/perfrec-python/perf.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 4ff149a1..316b415f 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -39,7 +39,7 @@ def generate_flamegraph(
     """
     # Ensure perf script is available
     try:
-        subprocess.run([perf_bin, "--version"], check=True)
+        subprocess.run([perf_bin, "--version"], shell=False, check=True)
     except subprocess.CalledProcessError:
         logging.error("perf is not installed or not in PATH.")
         return
@@ -62,16 +62,25 @@ def generate_flamegraph(
     fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd, "w") as f:
         script_output = subprocess.run(
-            [perf_bin, "script", "-i", perf_data], check=True, stdout=subprocess.PIPE
+            [perf_bin, "script", "-i", perf_data],
+            shell=False,
+            check=True,
+            stdout=subprocess.PIPE,
         )
         subprocess.run(
-            [stackcollapse_path], check=True, input=script_output.stdout, stdout=f
+            [stackcollapse_path],
+            shell=False,
+            check=True,
+            input=script_output.stdout,
+            stdout=f,
         )
 
     # Generate the flamegraph
     fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o640)
     with os.fdopen(fd_svg, "w") as f:
-        subprocess.run([flamegraph_script_path, folded_output], check=True, stdout=f)
+        subprocess.run(
+            [flamegraph_script_path, folded_output], shell=False, check=True, stdout=f
+        )
 
     logging.info("Flamegraph generated at %s", output_svg)
 
-- 
Gitee


From ed53a2a11ae39771aa2d511bb7504f14a8f7e2ad Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 17:15:01 +0800
Subject: [PATCH 31/34] cleancode

---
 tools/perfrec-python/fusion_tracing.py | 11 +++++++----
 tools/perfrec-python/perf.py           |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 0ed4a9fd..861dee58 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -16,14 +16,14 @@
 # ==============================================================================
 
 import argparse
-from dataclasses import dataclass
 import json
 import logging
 import os
 import re
 from collections import defaultdict
+from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import pandas as pd
 import toml
@@ -194,7 +194,7 @@ def get_process_id(log_line: str) -> int:
     return int(process_id)
 
 
-def read_mxrec_config() -> MxRecConfig:
+def read_mxrec_config() -> Optional[MxRecConfig]:
     """
     Reads the MxRec configuration from a TOML file.
 
@@ -206,7 +206,7 @@ def read_mxrec_config() -> MxRecConfig:
         return MxRecConfig(config["mxrec"])
     except toml.TomlDecodeError:
         logging.error("Can not open or load the config.toml.")
-        exit(1)
+        return None
 
 
 @dataclass
@@ -399,6 +399,9 @@ def main():
 
     log_path = args.debug_log
     config = read_mxrec_config()
+    if not config:
+        logging.error("Can not read config.toml, it will exit unsuccessfully.")
+        exit(1)
 
     mxrec_events = extract_mxrec_events(log_path, config)
     tracing = list()
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 316b415f..3885ec96 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -162,7 +162,7 @@ def analyze_folded_stack(folded_output: str) -> None:
     # Print the results using tabulate
     logging.info("\nFunctions with more than 5% of total samples:")
     headers = ["Function", "Count", "Percentage"]
-    logging.info("\n" + tabulate(table_data, headers=headers, tablefmt="grid"))
+    logging.info("\n%s", tabulate(table_data, headers=headers, tablefmt="grid"))
 
 
 def limit_line(input_content: str, line_length: int) -> str:
@@ -192,7 +192,7 @@ class PerfConfig:
     Configuration from `config.toml`.
     """
 
-    def __init__(self, threshold: float = 0.05, ignores: List[str] = []):
+    def __init__(self, threshold: float = 0.05, ignores: List[str] = list()):
         self.threshold = threshold
         self.ignores = set(ignores)
 
-- 
Gitee


From e9736ed84c63ed97635e86176fef1f7db02d08ee Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 18:49:48 +0800
Subject: [PATCH 32/34] cleancode

---
 tools/perfrec-python/fusion_tracing.py | 18 +++++++++---------
 tools/perfrec-python/perf.py           |  8 ++++----
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 861dee58..f244c8b1 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -23,7 +23,7 @@ import re
 from collections import defaultdict
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Tuple
 
 import pandas as pd
 import toml
@@ -194,7 +194,7 @@ def get_process_id(log_line: str) -> int:
     return int(process_id)
 
 
-def read_mxrec_config() -> Optional[MxRecConfig]:
+def read_mxrec_config() -> MxRecConfig:
     """
     Reads the MxRec configuration from a TOML file.
 
@@ -205,8 +205,7 @@ def read_mxrec_config() -> Optional[MxRecConfig]:
         config = toml.load("config.toml")
         return MxRecConfig(config["mxrec"])
     except toml.TomlDecodeError:
-        logging.error("Can not open or load the config.toml.")
-        return None
+        raise RuntimeError("can not load config.toml")
 
 
 @dataclass
@@ -398,14 +397,15 @@ def main():
     args = parser.parse_args()
 
     log_path = args.debug_log
-    config = read_mxrec_config()
-    if not config:
+    tracing = list()
+    try:
+        config = read_mxrec_config()
+        mxrec_events = extract_mxrec_events(log_path, config)
+        tracing.extend(get_metadata(list(mxrec_events.keys()), config))
+    except RuntimeError:
         logging.error("Can not read config.toml, it will exit unsuccessfully.")
         exit(1)
 
-    mxrec_events = extract_mxrec_events(log_path, config)
-    tracing = list()
-    tracing.extend(get_metadata(list(mxrec_events.keys()), config))
     for process in mxrec_events.values():
         for events in process.values():
             tracing.extend([TracingMxRecEvent(event) for event in events])
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 3885ec96..34f688e9 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -192,9 +192,9 @@ class PerfConfig:
     Configuration from `config.toml`.
     """
 
-    def __init__(self, threshold: float = 0.05, ignores: List[str] = list()):
-        self.threshold = threshold
+    def __init__(self, ignores: List[str], threshold: float = 0.05):
         self.ignores = set(ignores)
+        self.threshold = threshold
 
 
 def read_config() -> PerfConfig:
@@ -207,9 +207,9 @@ def read_config() -> PerfConfig:
     try:
         config = toml.load("config.toml")
         perf_config = config["perf"]
-        return PerfConfig(perf_config["threshold"], perf_config["ignores"])
+        return PerfConfig(perf_config["ignores"], perf_config["threshold"])
     except toml.TomlDecodeError:
-        return PerfConfig()
+        return PerfConfig(ignores=[])
 
 
 def main():
-- 
Gitee


From e7de3ffe7ddd774bad7a5a74f7c2d75c9365a9de Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Tue, 23 Jul 2024 19:56:47 +0800
Subject: [PATCH 33/34] cleancode

---
 tools/perfrec-python/fusion_tracing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index f244c8b1..49900004 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -204,8 +204,8 @@ def read_mxrec_config() -> MxRecConfig:
     try:
         config = toml.load("config.toml")
         return MxRecConfig(config["mxrec"])
-    except toml.TomlDecodeError:
-        raise RuntimeError("can not load config.toml")
+    except toml.TomlDecodeError as e:
+        raise RuntimeError("can not load config.toml") from e
 
 
 @dataclass
-- 
Gitee


From 766372cf303051535c1cc5fda4e8f7748f1244b9 Mon Sep 17 00:00:00 2001
From: steepcurve <steepcurve@163.com>
Date: Thu, 25 Jul 2024 15:06:27 +0800
Subject: [PATCH 34/34] fix overwrite

---
 tools/perfrec-python/fusion_tracing.py | 2 +-
 tools/perfrec-python/perf.py           | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perfrec-python/fusion_tracing.py b/tools/perfrec-python/fusion_tracing.py
index 49900004..72978924 100644
--- a/tools/perfrec-python/fusion_tracing.py
+++ b/tools/perfrec-python/fusion_tracing.py
@@ -416,7 +416,7 @@ def main():
         tracing.extend(op_metadata)
         tracing.extend(op_tracing)
 
-    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT, 0o640)
+    fd = os.open("mxrec_tracing.json", os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o640)
     with os.fdopen(fd, "w") as file:
         json.dump(tracing, file, indent=4, default=lambda obj: obj.__dict__)
 
diff --git a/tools/perfrec-python/perf.py b/tools/perfrec-python/perf.py
index 34f688e9..e9dadb3b 100644
--- a/tools/perfrec-python/perf.py
+++ b/tools/perfrec-python/perf.py
@@ -25,6 +25,8 @@ from typing import List
 import toml
 from tabulate import tabulate
 
+NEW_FILE_FLAG = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
+
 
 def generate_flamegraph(
     perf_bin: str, perf_data: str, output_svg: str, flamegraph_path: str
@@ -59,7 +61,7 @@ def generate_flamegraph(
 
     # Generate the folded stack output
     folded_output = perf_data + ".folded"
-    fd = os.open(folded_output, os.O_WRONLY | os.O_CREAT, 0o640)
+    fd = os.open(folded_output, NEW_FILE_FLAG, 0o640)
     with os.fdopen(fd, "w") as f:
         script_output = subprocess.run(
             [perf_bin, "script", "-i", perf_data],
@@ -76,7 +78,7 @@ def generate_flamegraph(
         )
 
     # Generate the flamegraph
-    fd_svg = os.open(output_svg, os.O_WRONLY | os.O_CREAT, 0o640)
+    fd_svg = os.open(output_svg, NEW_FILE_FLAG, 0o640)
     with os.fdopen(fd_svg, "w") as f:
         subprocess.run(
             [flamegraph_script_path, folded_output], shell=False, check=True, stdout=f
@@ -139,7 +141,7 @@ def analyze_folded_stack(folded_output: str) -> None:
     # Prepare data for tabulate
     # Write call stacks to file
     table_data = []
-    fd_call_stacks = os.open("call_stacks.txt", os.O_WRONLY | os.O_CREAT, 0o640)
+    fd_call_stacks = os.open("call_stacks.txt", NEW_FILE_FLAG, 0o640)
     with os.fdopen(fd_call_stacks, "w") as f:
         for func, call_stack in results:
             percentage = (
-- 
Gitee