# Copyright 2017-2023 Lawrence Livermore National Security, LLC and other
# Hatchet Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT
import re
import os
import glob
import pandas as pd
import hatchet.graphframe
from hatchet.node import Node
from hatchet.graph import Graph
from hatchet.frame import Frame
[docs]class TAUReader:
"""Read in a profile generated using TAU."""
def __init__(self, dirname):
self.dirname = dirname
self.node_dicts = []
self.callpath_to_node = {}
self.rank_thread_to_data = {}
self.filepath_to_data = {}
self.inc_metrics = []
self.exc_metrics = []
self.columns = []
self.multiple_ranks = False
self.multiple_threads = False
[docs] def create_node_dict(
self,
node,
columns,
metric_values,
name,
filename,
module,
start_line,
end_line,
rank,
thread,
):
node_dict = {
"node": node,
"rank": rank,
"thread": thread,
"name": name,
"file": filename,
"module": module,
"line": int(start_line),
"end_line": int(end_line),
}
for i in range(len(metric_values)):
node_dict[columns[i + 1]] = metric_values[i]
return node_dict
[docs] def create_graph(self):
def _get_name_file_module(is_parent, node_info, symbol):
"""This function gets the name, file and module information
for a node using the corresponding line in the output file.
Example line: [UNWIND] <file> [@] <name> [{<file_or_module>} {<line>}]
There are several line formats in TAU and this function gets
the node information considering all these formats for which
examples are given below.
"""
name, file, module = None, None, None
# There are several different formats in TAU outputs.
# There might be file, line, and module information.
# The following if-else block covers all possible output
# formats. Example formats are given in comments.
if symbol == " [@] ":
# Check if there is a [@] symbol.
node_info = node_info.split(symbol)
# We don't need file and module information if it's a parent node.
if not is_parent:
file = node_info[0].split()[1]
if "[{" in node_info[1]:
# Sometimes we see file and module information inside of [{}]
# Example: [UNWIND] <file> [@] <name> [{<file_or_module>} {<line>}]
name_and_module = node_info[1].split(" [{")
module = name_and_module[1].split()[0].strip("}")
else:
# Example: [UNWIND] <file> [@] <name> <module>
name_and_module = node_info[1].split()
module = name_and_module[1]
# Check if module is in file.
# Assign None to file if it's .so.
# Assign None to module if it's .c.
if module in file:
if ".so" in file:
file = None
if ".c" in module:
module = None
name = "[UNWIND] " + name_and_module[0]
else:
# We just need to take name if it is a parent
name = "[UNWIND] " + node_info[1].split()[0]
elif symbol == " C ":
# Check if there is a C symbol.
# "C" symbol means it's a C function.
node_info = node_info.split(symbol)
name = node_info[0]
# We don't need file and module information if it's a parent node.
if not is_parent:
if "[{" in node_info[1]:
# Example: <name> C [{<file>} {<line>}]
node_info = node_info[1].split()
file = node_info[0].strip("}[{")
else:
if "[{" in node_info:
# If there isn't C or [@]
# Example: [<type>] <name> [{} {}]
node_info = node_info.split(" [{")
name = node_info[0]
# We don't need file and module information if it's a parent node.
if not is_parent:
file = node_info[1].split()[0].strip("}{")
else:
# Example 1: [<type>] <name> <module>
# Example 2: [<type>] <name>
# Example 3: <name>
name = node_info
node_info = node_info.split()
# We need to take module information from the first example.
# Another example is "[CONTEXT] .TAU application" which contradicts
# with the first example. So we check if there is "\" symbol which
# will show the module information in this case.
if len(node_info) == 3 and "/" in name:
name = node_info[0] + " " + node_info[1]
# We don't need file and module information if it's a parent node.
if not is_parent:
module = node_info[2]
return [name, file, module]
def _get_line_numbers(node_info):
start_line, end_line = 0, 0
# There should be [{}] symbols if there is line number information.
if "[{" in node_info:
tmp_module_or_file_line = (
re.search(r"\{.*\}\]", node_info).group(0).split()
)
line_numbers = tmp_module_or_file_line[1].strip("}]").replace("{", "")
start_line = line_numbers
if "-" in line_numbers:
# Sometimes there is "-" between start line and end line
# Example: {341,1}-{396,1}
line_numbers = line_numbers.split("-")
start_line = line_numbers[0].split(",")[0]
end_line = line_numbers[1].split(",")[0]
else:
if "," in line_numbers:
# Sometimes we don't have "-".
# Example: {15,0}
start_line = line_numbers.split(",")[0]
end_line = line_numbers.split(",")[1]
return [start_line, end_line]
def _create_parent(child_node, parent_callpath):
"""In TAU output, sometimes we see a node as a parent
in the callpath before we see it as a leaf node. In
this case, we need to create a hatchet node for the parent.
We can't create a node_dict for the parent because we don't
know its metric values when we first see it in a callpath.
Example: a => b => c "<c_metric_values>"
Here, if we haven't seen 'b' before, we should create it when we
create 'c'.
This function recursively creates parent nodes in a callpath
until it reaches the already existing parent in that callpath.
"""
parent_node = self.callpath_to_node.get(parent_callpath)
# Return if arrives to the parent
# Else create a parent and add parent/child
if parent_node is not None:
parent_node.add_child(child_node)
child_node.add_parent(parent_node)
return
else:
grand_parent_callpath = parent_callpath[:-1]
parent_info = parent_callpath[-1]
parent_name = ""
if " C " in parent_info:
parent_name = _get_name_file_module(True, parent_info, " C ")[0]
elif " [@] " in parent_info:
parent_name = _get_name_file_module(True, parent_info, " [@] ")[0]
else:
parent_name = _get_name_file_module(True, parent_info, "")[0]
parent_node = Node(
Frame({"type": "function", "name": parent_name}), None
)
self.callpath_to_node[parent_callpath] = parent_node
parent_node.add_child(child_node)
child_node.add_parent(parent_node)
_create_parent(parent_node, grand_parent_callpath)
def _construct_column_list(first_rank_filenames):
"""This function constructs columns, exc_metrics, and
inc_metrics using all metric files of a rank. It gets the
all metric files of a rank as a tuple and only loads the
second line (metadata) of these files.
"""
columns = []
for file_index in range(len(first_rank_filenames)):
with open(first_rank_filenames[file_index], "r") as f:
# Skip the first line: "192 templated_functions_MULTI_TIME"
next(f)
# No need to check if the metadata is the same for all metric files.
metadata = next(f)
# Get first three columns from # Name Calls Subrs Excl Incl ProfileCalls #
# ProfileCalls is removed since it is is typically set to 0 and not used.
# We only do this once since these column names are the same for all files.
if file_index == 0:
columns.extend(
re.match(r"\#\s(.*)\s\#", metadata).group(1).split(" ")[:-3]
)
# Example metric_name: "PAPI_L2_TCM"
# TODO: Decide if Calls and Subrs should be inc or exc metrics
metric_name = re.search(r"<value>(.*?)<\/value>", metadata).group(1)
if metric_name == "CPU_TIME" or metric_name == "TIME":
metric_name = "time"
elif metric_name == "Name":
metric_name == "name"
columns.extend([metric_name, metric_name + " (inc)"])
self.exc_metrics.append(metric_name)
self.inc_metrics.append(metric_name + " (inc)")
return columns
# dirpath -> returns path of a directory, string
# dirnames -> returns directory names, list
# files -> returns filenames in a directory, list
profile_filenames = []
for dirpath, dirnames, files in os.walk(self.dirname):
profiles_in_dir = glob.glob(dirpath + "/profile.*")
if profiles_in_dir:
# sort input files in each directory in the same order
profile_filenames.append(sorted(profiles_in_dir))
# Store all files in a list of tuples.
# Each tuple stores all the metric files of a rank.
# We process one rank at a time.
# Example: [(metric1/profile.x.0.0, metric2/profile.x.0.0), ...]
profile_filenames = list(zip(*profile_filenames))
# Get column information from the metric files of a rank.
self.columns = _construct_column_list(profile_filenames[0])
list_roots = []
prev_rank, prev_thread = 0, 0
# Example filenames_per_rank: (metric1/profile.x.0.0 ...)
for filenames_per_rank in profile_filenames:
file_info = filenames_per_rank[0].split(".")
rank, thread = int(file_info[-3]), int(file_info[-1])
if not self.multiple_ranks:
self.multiple_ranks = True if rank != prev_rank else False
if not self.multiple_threads:
self.multiple_threads = True if thread != prev_thread else False
# Load all files represent a different metric for a rank or a thread.
# If there are 2 metrics, load metric1\profile.x.0.0 and metric2\profile.x.0.0
file_data = []
for f_index in range(len(filenames_per_rank)):
# Store the lines after metadata.
file_data.append(open(filenames_per_rank[f_index], "r").readlines()[2:])
# Get the root information from only the first file to compare them
# with others.
# Example: ".TAU application" 1 1 272 15755429 0 GROUP="TAU_DEFAULT"
root_line = re.match(r"\"(.*)\"\s(.*)\sG", file_data[0][0])
root_name = root_line.group(1).strip(" ")
# convert it to a tuple to use it as a key in callpath_to_node dictionary
root_callpath = tuple([root_name])
root_values = list(map(int, root_line.group(2).split(" ")[:-1]))
# After first profile.0.0.0, only get Excl and Incl metric values
# from other files since other columns will be the same.
# We assume each metric file of a rank has the same root.
first_file_root_name = re.search(r"\"(.*?)\"", file_data[0][0]).group(1)
for f_index in range(1, len(file_data)):
root_name = re.search(r"\"(.*?)\"", file_data[f_index][0]).group(1)
# Below assert statement throws an error if the roots are not the
# same for different metric files.
# TODO: We need to find a solution if this throws an error.
assert first_file_root_name == root_name, (
"Metric files for a rank has different roots.\n"
+ "File: "
+ filenames_per_rank[f_index]
+ "\nLine: 2"
)
root_line = re.match(r"\"(.*)\"\s(.*)\sG", file_data[f_index][0])
root_values.extend(list(map(int, root_line.group(2).split(" ")[2:4])))
# Check if the root exists in other ranks.
# Note that we assume the root is the same for all metric files of a rank.
if root_callpath not in self.callpath_to_node:
# Create the root node since it doesn't exist
root_node = Node(Frame({"name": root_name, "type": "function"}), None)
# Store callpaths to identify nodes
self.callpath_to_node[root_callpath] = root_node
list_roots.append(root_node)
else:
# Don't create a new node since it is created earlier
root_node = self.callpath_to_node.get(root_callpath)
node_dict = self.create_node_dict(
root_node,
self.columns,
root_values,
root_name,
None,
None,
0,
0,
rank,
thread,
)
self.node_dicts.append(node_dict)
# Start from the line after root.
# Iterate over only the first metric file of a rank
# since the lines should be exactly the same across
# all metric files of a rank.
# Uses the same "line_index" for other metric files of a rank.
for line_index in range(1, len(file_data[0])):
line = file_data[0][line_index]
metric_values = []
# We only parse the lines that has "=>" symbol which shows the callpath info.
# We just skip the other lines.
if "=>" in line:
# Example: ".TAU application => foo() => bar()" 31 0 155019 155019 0 GROUP="TAU_SAMPLE|TAU_CALLPATH"
callpath_line_regex = re.match(r"\"(.*)\"\s(.*)\sG", line)
# callpath: ".TAU application => foo() => bar()"
callpath = [
name.strip(" ")
for name in callpath_line_regex.group(1).split("=>")
]
# Example leaf_name: StrToInt [{lulesh-util.cc} {13,1}-{29,1}]
leaf_name = callpath[-1]
callpath = tuple(callpath)
parent_callpath = callpath[:-1]
# Don't include the value for ProfileCalls.
# metric_values: 31 0 155019 155019
metric_values = list(
map(float, callpath_line_regex.group(2).split(" ")[:-1])
)
# Get start and end line information
leaf_line_numbers = _get_line_numbers(leaf_name)
# Get name, file, and module information using the leaf name
# and the symbol on it
if " C " in leaf_name:
leaf_name_file_module = _get_name_file_module(
False, leaf_name, " C "
)
elif " [@] " in leaf_name:
leaf_name_file_module = _get_name_file_module(
False, leaf_name, " [@] "
)
else:
leaf_name_file_module = _get_name_file_module(
False, leaf_name, ""
)
# Example: ".TAU application => foo() => bar()" 31 0 155019..."
first_file_callpath_line = re.search(
r"\"(.*?)\"", file_data[0][line_index]
).group(1)
# After first profile.x.0.0, only get Excl and Incl metric values
# from other files.
for f_index in range(1, len(file_data)):
other_file_callpath_line = re.search(
r"\"(.*?)\"", file_data[f_index][line_index]
).group(1)
# We assume metric files of a rank should have the exact same lines.
# Only difference should be the Incl and Excl metric values.
# TODO: We should find a solution if this raises an error.
assert first_file_callpath_line == other_file_callpath_line, (
"Lines across metric files for a rank are not the same.\n"
+ "File: "
+ filenames_per_rank[f_index]
+ "\nLine: "
+ str(line_index + 3)
)
# Get the information from the same line in each file. "line_index".
callpath_line_regex = re.match(
r"\"(.*)\"\s(.*)\sG", file_data[f_index][line_index]
)
metric_values.extend(
map(float, callpath_line_regex.group(2).split(" ")[2:4])
)
leaf_node = self.callpath_to_node.get(callpath)
# Check if that node is created earlier
if leaf_node is None:
# Create the node since it doesn't exist
leaf_node = Node(
Frame(
{"type": "function", "name": leaf_name_file_module[0]}
),
None,
)
self.callpath_to_node[callpath] = leaf_node
# Get its parent from its callpath.
parent_node = self.callpath_to_node.get(parent_callpath)
if parent_node is None:
# Create parent if it doesn't exist.
_create_parent(leaf_node, parent_callpath)
else:
parent_node.add_child(leaf_node)
leaf_node.add_parent(parent_node)
node_dict = self.create_node_dict(
leaf_node,
self.columns,
metric_values,
# name
leaf_name_file_module[0],
# file
leaf_name_file_module[1],
# module
leaf_name_file_module[2],
# start line
leaf_line_numbers[0],
# end line
leaf_line_numbers[1],
rank,
thread,
)
self.node_dicts.append(node_dict)
return list_roots
[docs] def read(self):
"""Read the TAU profile file to extract the calling context tree."""
# Add all nodes and roots.
roots = self.create_graph()
# Create a graph object once all nodes have been added.
graph = Graph(roots)
graph.enumerate_traverse()
dataframe = pd.DataFrame.from_dict(data=self.node_dicts)
indices = []
# Set indices according to rank/thread numbers.
if self.multiple_ranks and self.multiple_threads:
indices = ["node", "rank", "thread"]
elif self.multiple_ranks:
dataframe.drop(columns=["thread"], inplace=True)
indices = ["node", "rank"]
elif self.multiple_threads:
dataframe.drop(columns=["rank"], inplace=True)
indices = ["node", "thread"]
else:
indices = ["node"]
dataframe.set_index(indices, inplace=True)
dataframe.sort_index(inplace=True)
# Fill the missing ranks
# After unstacking and iterating over rows, there
# will be "NaN" values for some ranks. Find the first
# rank that has notna value and use it for other rows/ranks
# of the multiindex.
# TODO: iterrows() is not the best way to iterate over rows.
if self.multiple_ranks or self.multiple_threads:
dataframe = dataframe.unstack()
for idx, row in dataframe.iterrows():
# There is always a valid name for an index.
# Take that valid name and assign to other ranks/rows.
name = row["name"][row["name"].first_valid_index()]
dataframe.loc[idx, "name"] = name
# Sometimes there is no file information.
if row["file"].first_valid_index() is not None:
file = row["file"][row["file"].first_valid_index()]
dataframe.loc[idx, "file"] = file
# Sometimes there is no module information.
if row["module"].first_valid_index() is not None:
module = row["module"][row["module"].first_valid_index()]
dataframe.loc[idx, "module"] = module
# Fill the rest with 0
dataframe.fillna(0, inplace=True)
# Stack the dataframe
dataframe = dataframe.stack()
default_metric = "time (inc)"
return hatchet.graphframe.GraphFrame(
graph, dataframe, self.exc_metrics, self.inc_metrics, default_metric
)