Source code for hatchet.readers.tau_reader

# Copyright 2017-2023 Lawrence Livermore National Security, LLC and other
# Hatchet Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

import re
import os
import glob
import pandas as pd
import hatchet.graphframe
from hatchet.node import Node
from hatchet.graph import Graph
from hatchet.frame import Frame


[docs]class TAUReader: """Read in a profile generated using TAU.""" def __init__(self, dirname): self.dirname = dirname self.node_dicts = [] self.callpath_to_node = {} self.rank_thread_to_data = {} self.filepath_to_data = {} self.inc_metrics = [] self.exc_metrics = [] self.columns = [] self.multiple_ranks = False self.multiple_threads = False
[docs] def create_node_dict( self, node, columns, metric_values, name, filename, module, start_line, end_line, rank, thread, ): node_dict = { "node": node, "rank": rank, "thread": thread, "name": name, "file": filename, "module": module, "line": int(start_line), "end_line": int(end_line), } for i in range(len(metric_values)): node_dict[columns[i + 1]] = metric_values[i] return node_dict
[docs] def create_graph(self): def _get_name_file_module(is_parent, node_info, symbol): """This function gets the name, file and module information for a node using the corresponding line in the output file. Example line: [UNWIND] <file> [@] <name> [{<file_or_module>} {<line>}] There are several line formats in TAU and this function gets the node information considering all these formats for which examples are given below. """ name, file, module = None, None, None # There are several different formats in TAU outputs. # There might be file, line, and module information. # The following if-else block covers all possible output # formats. Example formats are given in comments. if symbol == " [@] ": # Check if there is a [@] symbol. node_info = node_info.split(symbol) # We don't need file and module information if it's a parent node. if not is_parent: file = node_info[0].split()[1] if "[{" in node_info[1]: # Sometimes we see file and module information inside of [{}] # Example: [UNWIND] <file> [@] <name> [{<file_or_module>} {<line>}] name_and_module = node_info[1].split(" [{") module = name_and_module[1].split()[0].strip("}") else: # Example: [UNWIND] <file> [@] <name> <module> name_and_module = node_info[1].split() module = name_and_module[1] # Check if module is in file. # Assign None to file if it's .so. # Assign None to module if it's .c. if module in file: if ".so" in file: file = None if ".c" in module: module = None name = "[UNWIND] " + name_and_module[0] else: # We just need to take name if it is a parent name = "[UNWIND] " + node_info[1].split()[0] elif symbol == " C ": # Check if there is a C symbol. # "C" symbol means it's a C function. node_info = node_info.split(symbol) name = node_info[0] # We don't need file and module information if it's a parent node. if not is_parent: if "[{" in node_info[1]: # Example: <name> C [{<file>} {<line>}] node_info = node_info[1].split() file = node_info[0].strip("}[{") else: if "[{" in node_info: # If there isn't C or [@] # Example: [<type>] <name> [{} {}] node_info = node_info.split(" [{") name = node_info[0] # We don't need file and module information if it's a parent node. if not is_parent: file = node_info[1].split()[0].strip("}{") else: # Example 1: [<type>] <name> <module> # Example 2: [<type>] <name> # Example 3: <name> name = node_info node_info = node_info.split() # We need to take module information from the first example. # Another example is "[CONTEXT] .TAU application" which contradicts # with the first example. So we check if there is "\" symbol which # will show the module information in this case. if len(node_info) == 3 and "/" in name: name = node_info[0] + " " + node_info[1] # We don't need file and module information if it's a parent node. if not is_parent: module = node_info[2] return [name, file, module] def _get_line_numbers(node_info): start_line, end_line = 0, 0 # There should be [{}] symbols if there is line number information. if "[{" in node_info: tmp_module_or_file_line = ( re.search(r"\{.*\}\]", node_info).group(0).split() ) line_numbers = tmp_module_or_file_line[1].strip("}]").replace("{", "") start_line = line_numbers if "-" in line_numbers: # Sometimes there is "-" between start line and end line # Example: {341,1}-{396,1} line_numbers = line_numbers.split("-") start_line = line_numbers[0].split(",")[0] end_line = line_numbers[1].split(",")[0] else: if "," in line_numbers: # Sometimes we don't have "-". # Example: {15,0} start_line = line_numbers.split(",")[0] end_line = line_numbers.split(",")[1] return [start_line, end_line] def _create_parent(child_node, parent_callpath): """In TAU output, sometimes we see a node as a parent in the callpath before we see it as a leaf node. In this case, we need to create a hatchet node for the parent. We can't create a node_dict for the parent because we don't know its metric values when we first see it in a callpath. Example: a => b => c "<c_metric_values>" Here, if we haven't seen 'b' before, we should create it when we create 'c'. This function recursively creates parent nodes in a callpath until it reaches the already existing parent in that callpath. """ parent_node = self.callpath_to_node.get(parent_callpath) # Return if arrives to the parent # Else create a parent and add parent/child if parent_node is not None: parent_node.add_child(child_node) child_node.add_parent(parent_node) return else: grand_parent_callpath = parent_callpath[:-1] parent_info = parent_callpath[-1] parent_name = "" if " C " in parent_info: parent_name = _get_name_file_module(True, parent_info, " C ")[0] elif " [@] " in parent_info: parent_name = _get_name_file_module(True, parent_info, " [@] ")[0] else: parent_name = _get_name_file_module(True, parent_info, "")[0] parent_node = Node( Frame({"type": "function", "name": parent_name}), None ) self.callpath_to_node[parent_callpath] = parent_node parent_node.add_child(child_node) child_node.add_parent(parent_node) _create_parent(parent_node, grand_parent_callpath) def _construct_column_list(first_rank_filenames): """This function constructs columns, exc_metrics, and inc_metrics using all metric files of a rank. It gets the all metric files of a rank as a tuple and only loads the second line (metadata) of these files. """ columns = [] for file_index in range(len(first_rank_filenames)): with open(first_rank_filenames[file_index], "r") as f: # Skip the first line: "192 templated_functions_MULTI_TIME" next(f) # No need to check if the metadata is the same for all metric files. metadata = next(f) # Get first three columns from # Name Calls Subrs Excl Incl ProfileCalls # # ProfileCalls is removed since it is is typically set to 0 and not used. # We only do this once since these column names are the same for all files. if file_index == 0: columns.extend( re.match(r"\#\s(.*)\s\#", metadata).group(1).split(" ")[:-3] ) # Example metric_name: "PAPI_L2_TCM" # TODO: Decide if Calls and Subrs should be inc or exc metrics metric_name = re.search(r"<value>(.*?)<\/value>", metadata).group(1) if metric_name == "CPU_TIME" or metric_name == "TIME": metric_name = "time" elif metric_name == "Name": metric_name == "name" columns.extend([metric_name, metric_name + " (inc)"]) self.exc_metrics.append(metric_name) self.inc_metrics.append(metric_name + " (inc)") return columns # dirpath -> returns path of a directory, string # dirnames -> returns directory names, list # files -> returns filenames in a directory, list profile_filenames = [] for dirpath, dirnames, files in os.walk(self.dirname): profiles_in_dir = glob.glob(dirpath + "/profile.*") if profiles_in_dir: # sort input files in each directory in the same order profile_filenames.append(sorted(profiles_in_dir)) # Store all files in a list of tuples. # Each tuple stores all the metric files of a rank. # We process one rank at a time. # Example: [(metric1/profile.x.0.0, metric2/profile.x.0.0), ...] profile_filenames = list(zip(*profile_filenames)) # Get column information from the metric files of a rank. self.columns = _construct_column_list(profile_filenames[0]) list_roots = [] prev_rank, prev_thread = 0, 0 # Example filenames_per_rank: (metric1/profile.x.0.0 ...) for filenames_per_rank in profile_filenames: file_info = filenames_per_rank[0].split(".") rank, thread = int(file_info[-3]), int(file_info[-1]) if not self.multiple_ranks: self.multiple_ranks = True if rank != prev_rank else False if not self.multiple_threads: self.multiple_threads = True if thread != prev_thread else False # Load all files represent a different metric for a rank or a thread. # If there are 2 metrics, load metric1\profile.x.0.0 and metric2\profile.x.0.0 file_data = [] for f_index in range(len(filenames_per_rank)): # Store the lines after metadata. file_data.append(open(filenames_per_rank[f_index], "r").readlines()[2:]) # Get the root information from only the first file to compare them # with others. # Example: ".TAU application" 1 1 272 15755429 0 GROUP="TAU_DEFAULT" root_line = re.match(r"\"(.*)\"\s(.*)\sG", file_data[0][0]) root_name = root_line.group(1).strip(" ") # convert it to a tuple to use it as a key in callpath_to_node dictionary root_callpath = tuple([root_name]) root_values = list(map(int, root_line.group(2).split(" ")[:-1])) # After first profile.0.0.0, only get Excl and Incl metric values # from other files since other columns will be the same. # We assume each metric file of a rank has the same root. first_file_root_name = re.search(r"\"(.*?)\"", file_data[0][0]).group(1) for f_index in range(1, len(file_data)): root_name = re.search(r"\"(.*?)\"", file_data[f_index][0]).group(1) # Below assert statement throws an error if the roots are not the # same for different metric files. # TODO: We need to find a solution if this throws an error. assert first_file_root_name == root_name, ( "Metric files for a rank has different roots.\n" + "File: " + filenames_per_rank[f_index] + "\nLine: 2" ) root_line = re.match(r"\"(.*)\"\s(.*)\sG", file_data[f_index][0]) root_values.extend(list(map(int, root_line.group(2).split(" ")[2:4]))) # Check if the root exists in other ranks. # Note that we assume the root is the same for all metric files of a rank. if root_callpath not in self.callpath_to_node: # Create the root node since it doesn't exist root_node = Node(Frame({"name": root_name, "type": "function"}), None) # Store callpaths to identify nodes self.callpath_to_node[root_callpath] = root_node list_roots.append(root_node) else: # Don't create a new node since it is created earlier root_node = self.callpath_to_node.get(root_callpath) node_dict = self.create_node_dict( root_node, self.columns, root_values, root_name, None, None, 0, 0, rank, thread, ) self.node_dicts.append(node_dict) # Start from the line after root. # Iterate over only the first metric file of a rank # since the lines should be exactly the same across # all metric files of a rank. # Uses the same "line_index" for other metric files of a rank. for line_index in range(1, len(file_data[0])): line = file_data[0][line_index] metric_values = [] # We only parse the lines that has "=>" symbol which shows the callpath info. # We just skip the other lines. if "=>" in line: # Example: ".TAU application => foo() => bar()" 31 0 155019 155019 0 GROUP="TAU_SAMPLE|TAU_CALLPATH" callpath_line_regex = re.match(r"\"(.*)\"\s(.*)\sG", line) # callpath: ".TAU application => foo() => bar()" callpath = [ name.strip(" ") for name in callpath_line_regex.group(1).split("=>") ] # Example leaf_name: StrToInt [{lulesh-util.cc} {13,1}-{29,1}] leaf_name = callpath[-1] callpath = tuple(callpath) parent_callpath = callpath[:-1] # Don't include the value for ProfileCalls. # metric_values: 31 0 155019 155019 metric_values = list( map(float, callpath_line_regex.group(2).split(" ")[:-1]) ) # Get start and end line information leaf_line_numbers = _get_line_numbers(leaf_name) # Get name, file, and module information using the leaf name # and the symbol on it if " C " in leaf_name: leaf_name_file_module = _get_name_file_module( False, leaf_name, " C " ) elif " [@] " in leaf_name: leaf_name_file_module = _get_name_file_module( False, leaf_name, " [@] " ) else: leaf_name_file_module = _get_name_file_module( False, leaf_name, "" ) # Example: ".TAU application => foo() => bar()" 31 0 155019..." first_file_callpath_line = re.search( r"\"(.*?)\"", file_data[0][line_index] ).group(1) # After first profile.x.0.0, only get Excl and Incl metric values # from other files. for f_index in range(1, len(file_data)): other_file_callpath_line = re.search( r"\"(.*?)\"", file_data[f_index][line_index] ).group(1) # We assume metric files of a rank should have the exact same lines. # Only difference should be the Incl and Excl metric values. # TODO: We should find a solution if this raises an error. assert first_file_callpath_line == other_file_callpath_line, ( "Lines across metric files for a rank are not the same.\n" + "File: " + filenames_per_rank[f_index] + "\nLine: " + str(line_index + 3) ) # Get the information from the same line in each file. "line_index". callpath_line_regex = re.match( r"\"(.*)\"\s(.*)\sG", file_data[f_index][line_index] ) metric_values.extend( map(float, callpath_line_regex.group(2).split(" ")[2:4]) ) leaf_node = self.callpath_to_node.get(callpath) # Check if that node is created earlier if leaf_node is None: # Create the node since it doesn't exist leaf_node = Node( Frame( {"type": "function", "name": leaf_name_file_module[0]} ), None, ) self.callpath_to_node[callpath] = leaf_node # Get its parent from its callpath. parent_node = self.callpath_to_node.get(parent_callpath) if parent_node is None: # Create parent if it doesn't exist. _create_parent(leaf_node, parent_callpath) else: parent_node.add_child(leaf_node) leaf_node.add_parent(parent_node) node_dict = self.create_node_dict( leaf_node, self.columns, metric_values, # name leaf_name_file_module[0], # file leaf_name_file_module[1], # module leaf_name_file_module[2], # start line leaf_line_numbers[0], # end line leaf_line_numbers[1], rank, thread, ) self.node_dicts.append(node_dict) return list_roots
[docs] def read(self): """Read the TAU profile file to extract the calling context tree.""" # Add all nodes and roots. roots = self.create_graph() # Create a graph object once all nodes have been added. graph = Graph(roots) graph.enumerate_traverse() dataframe = pd.DataFrame.from_dict(data=self.node_dicts) indices = [] # Set indices according to rank/thread numbers. if self.multiple_ranks and self.multiple_threads: indices = ["node", "rank", "thread"] elif self.multiple_ranks: dataframe.drop(columns=["thread"], inplace=True) indices = ["node", "rank"] elif self.multiple_threads: dataframe.drop(columns=["rank"], inplace=True) indices = ["node", "thread"] else: indices = ["node"] dataframe.set_index(indices, inplace=True) dataframe.sort_index(inplace=True) # Fill the missing ranks # After unstacking and iterating over rows, there # will be "NaN" values for some ranks. Find the first # rank that has notna value and use it for other rows/ranks # of the multiindex. # TODO: iterrows() is not the best way to iterate over rows. if self.multiple_ranks or self.multiple_threads: dataframe = dataframe.unstack() for idx, row in dataframe.iterrows(): # There is always a valid name for an index. # Take that valid name and assign to other ranks/rows. name = row["name"][row["name"].first_valid_index()] dataframe.loc[idx, "name"] = name # Sometimes there is no file information. if row["file"].first_valid_index() is not None: file = row["file"][row["file"].first_valid_index()] dataframe.loc[idx, "file"] = file # Sometimes there is no module information. if row["module"].first_valid_index() is not None: module = row["module"][row["module"].first_valid_index()] dataframe.loc[idx, "module"] = module # Fill the rest with 0 dataframe.fillna(0, inplace=True) # Stack the dataframe dataframe = dataframe.stack() default_metric = "time (inc)" return hatchet.graphframe.GraphFrame( graph, dataframe, self.exc_metrics, self.inc_metrics, default_metric )