Source code for pm4py.algo.discovery.inductive.variants.im.data_structures.subtree_plain

'''
    This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

    PM4Py is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PM4Py is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with PM4Py.  If not, see <https://www.gnu.org/licenses/>.
'''
from copy import copy
import pkgutil

from pm4py.algo.discovery.dfg.utils.dfg_utils import get_activities_from_dfg, \
    infer_start_activities, infer_end_activities
from pm4py.algo.discovery.dfg.utils.dfg_utils import get_ingoing_edges, get_outgoing_edges
from pm4py.algo.discovery.dfg.utils.dfg_utils import negate, get_activities_self_loop, transform_dfg_to_directed_nx_graph
from pm4py.algo.discovery.dfg.variants import native as dfg_inst
from pm4py.algo.filtering.dfg.dfg_filtering import clean_dfg_based_on_noise_thresh
from pm4py.algo.discovery.inductive.variants.im.util import base_case, fall_through
from pm4py import util as pmutil
from pm4py.algo.discovery.inductive.variants.im.util import splitting as split
from pm4py.algo.discovery.inductive.util import parallel_cut_utils, detection_utils, cut_detection
from pm4py.statistics.attributes.log import get as attributes_get
from pm4py.statistics.end_activities.log import get as end_activities_get
from pm4py.statistics.start_activities.log import get as start_activities_get
from pm4py.util import exec_utils
from pm4py.objects.log.util import filtering_utils
import logging
from pm4py.util import constants
from enum import Enum


[docs]class Parameters(Enum): ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY START_TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY NOISE_THRESHOLD = "noiseThreshold" EMPTY_TRACE_KEY = "empty_trace" ONCE_PER_TRACE_KEY = "once_per_trace" CONCURRENT_KEY = "concurrent" STRICT_TAU_LOOP_KEY = "strict_tau_loop" TAU_LOOP_KEY = "tau_loop"
[docs]class SubtreePlain(object): def __init__(self, log, dfg, master_dfg, initial_dfg, activities, counts, rec_depth, noise_threshold=0, start_activities=None, end_activities=None, initial_start_activities=None, initial_end_activities=None, parameters=None, real_init=True): """ Constructor Parameters ----------- dfg Directly follows graph of this subtree master_dfg Original DFG initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree counts Shared variable rec_depth Current recursion depth """ if real_init: self.master_dfg = copy(master_dfg) self.initial_dfg = copy(initial_dfg) self.counts = counts self.rec_depth = rec_depth self.noise_threshold = noise_threshold self.start_activities = start_activities if self.start_activities is None: self.start_activities = [] self.end_activities = end_activities if self.end_activities is None: self.end_activities = [] self.initial_start_activities = initial_start_activities if self.initial_start_activities is None: self.initial_start_activities = infer_start_activities(master_dfg) self.initial_end_activities = initial_end_activities if self.initial_end_activities is None: self.initial_end_activities = infer_end_activities(master_dfg) self.second_iteration = None self.activities = None self.dfg = None self.outgoing = None self.ingoing = None self.self_loop_activities = None self.initial_ingoing = None self.initial_outgoing = None self.activities_direction = None self.activities_dir_list = None self.negated_dfg = None self.negated_activities = None self.negated_outgoing = None self.negated_ingoing = None self.detected_cut = None self.children = None self.must_insert_skip = False self.log = log self.inverted_dfg = None self.original_log = log self.initialize_tree(dfg, log, initial_dfg, activities, parameters=parameters) def __deepcopy__(self, memodict={}): """ def __init__(self, log, dfg, master_dfg, initial_dfg, activities, counts, rec_depth, noise_threshold=0, start_activities=None, end_activities=None, initial_start_activities=None, initial_end_activities=None, parameters=None, real_init=False): :param memodict: :return: """ S = SubtreePlain(None, None, None, None, None, None, None, real_init=False) S.master_dfg = self.master_dfg S.initial_dfg = self.initial_dfg S.counts = self.counts S.rec_depth = self.rec_depth S.noise_threshold = self.noise_threshold S.start_activities = self.start_activities S.end_activities = self.end_activities S.initial_start_activities = self.initial_start_activities S.initial_end_activities = self.initial_end_activities S.second_iteration = self.second_iteration S.activities = self.activities S.dfg = self.dfg S.outgoing = self.outgoing S.ingoing = self.ingoing S.self_loop_activities = self.self_loop_activities S.initial_ingoing = self.initial_ingoing S.initial_outgoing = self.initial_outgoing S.activities_direction = self.activities_direction S.activities_dir_list = self.activities_dir_list S.negated_dfg = self.negated_dfg S.negated_activities = self.negated_activities S.negated_outgoing = self.negated_outgoing S.negated_ingoing = self.negated_ingoing S.detected_cut = self.detected_cut S.children = self.children S.must_insert_skip = self.must_insert_skip S.log = self.log S.inverted_dfg = self.inverted_dfg S.original_log = self.original_log try: S.parameters = self.parameters except: pass return S
[docs] def initialize_tree(self, dfg, log, initial_dfg, activities, second_iteration=False, end_call=True, parameters=None): """ Initialize the tree Parameters ----------- dfg Directly follows graph of this subtree log the event log initial_dfg Referral directly follows graph that should be taken in account adding hidden/loop transitions activities Activities of this subtree second_iteration Boolean that indicates if we are executing this method for the second time """ self.second_iteration = second_iteration if activities is None: self.activities = get_activities_from_dfg(dfg) else: self.activities = copy(activities) if second_iteration: self.dfg = clean_dfg_based_on_noise_thresh(self.dfg, self.activities, self.noise_threshold) else: self.dfg = copy(dfg) self.initial_dfg = initial_dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.self_loop_activities = get_activities_self_loop(self.dfg) self.initial_outgoing = get_outgoing_edges(self.initial_dfg) self.initial_ingoing = get_ingoing_edges(self.initial_dfg) self.negated_dfg = negate(self.dfg) self.negated_activities = get_activities_from_dfg(self.negated_dfg) self.negated_outgoing = get_outgoing_edges(self.negated_dfg) self.negated_ingoing = get_ingoing_edges(self.negated_dfg) self.detected_cut = None self.children = [] self.log = log self.original_log = log self.parameters = parameters self.detect_cut(second_iteration=False, parameters=parameters)
[docs] def create_dfg(self, parameters=None): if parameters is None: parameters = {} dfg = [(k, v) for k, v in dfg_inst.apply(self.log, parameters=parameters).items() if v > 0] return dfg
[docs] def is_followed_by(self, dfg, activity_a, activity_b): """ check if Activity A is followed by Activity B in the dfg of self, returns bool. """ for i in range(0, len(dfg)): if (activity_a, activity_b) == dfg[i][0]: return True return False
[docs] def detect_xor(self, conn_components): """ Detects xor cut Parameters -------------- conn_components Connected components this_nx_graph NX graph calculated on the DFG strongly_connected_components Strongly connected components """ if not self.contains_empty_trace() and len(conn_components) > 1: return [True, conn_components] return [False, []]
[docs] def detect_concurrent(self): if self.contains_empty_trace(): return [False, []] inverted_dfg = [] # create an inverted dfg, the connected components of this dfg are the split for a in self.activities: for b in self.activities: if a != b: if not self.is_followed_by(self.dfg, a, b) or not self.is_followed_by(self.dfg, b, a): if ((a, b), 1) not in inverted_dfg: inverted_dfg.append(((a, b), 1)) inverted_dfg.append(((b, a), 1)) self.inverted_dfg = inverted_dfg new_ingoing = get_ingoing_edges(inverted_dfg) new_outgoing = get_outgoing_edges(inverted_dfg) conn = detection_utils.get_connected_components(new_ingoing, new_outgoing, self.activities) if len(conn) > 1: conn = parallel_cut_utils.check_par_cut(conn, self.ingoing, self.outgoing) if len(conn) > 1: if parallel_cut_utils.check_sa_ea_for_each_branch(conn, self.start_activities, self.end_activities): return [True, conn] return [False, []]
[docs] def get_index_of_x_in_list(self, x, li): for i in range(0, len(li)): if li[i] == x: return i
[docs] def find_set_with_x(self, x, list_of_sets): for i in range(0, len(list_of_sets)): if x in list_of_sets[i]: return i
[docs] def contains_empty_trace(self): contains = False for trace in self.log: if len(trace) == 0: contains = True return contains
[docs] def detect_loop(self): # p0 is part of return value, it contains the partition of activities # write all start and end activities in p1 if self.contains_empty_trace(): return [False, []] p1 = [] for act in self.start_activities: if act not in p1: p1.append(act) for act in self.end_activities: if act not in p1: p1.append(act) # create new dfg without the transitions to start and end activities new_dfg = copy(self.dfg) copy_dfg = copy(new_dfg) for ele in copy_dfg: if ele[0][0] in p1 or ele[0][1] in p1: new_dfg.remove(ele) current_activities = {} iterate_order = [] for element in self.activities: if element not in p1: current_activities.update({element: 1}) iterate_order.append(element) # create an overview of what activity has which outgoing connections outgoing = [list() for _ in range(len(iterate_order))] for i in range(0, len(iterate_order)): current_act = iterate_order[i] for element in new_dfg: if element[0][0] == current_act: if element[0][1] in iterate_order: outgoing[i].append(element[0][1]) # built p2, ..., pn : acts_not_assigned_to_set = copy(iterate_order) p = [list() for _ in range(len(iterate_order))] max_set = iterate_order max_set_found = False for i in range(0, len(iterate_order)): act = iterate_order[i] if act in acts_not_assigned_to_set: p[i] = [act] acts_not_assigned_to_set.remove(act) added = True while added: added = False # check if max set is already found count = 0 for li in p: if len(li) != 0: count += 1 if count == len(max_set): max_set_found = True break # if max set is not found, continue adding acts for act_b in p[i]: index_of_act_b_in_outgoing = self.get_index_of_x_in_list(act_b, iterate_order) for outgoing_act in outgoing[index_of_act_b_in_outgoing]: if outgoing_act in acts_not_assigned_to_set: acts_not_assigned_to_set.remove(outgoing_act) p[i].append(outgoing_act) added = True break if added: break if max_set_found: break for li in p: if len(li) == 0: p.remove(li) # p0 = get_connected_components(new_ingoing, new_outgoing, current_activities) p.insert(0, p1) p0 = p iterable_dfg = [] for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) # p0 is like P1,P2,...,Pn in line 3 on page 190 of the IM Thesis # check for subsets in p0 that have connections to and end or from a start activity p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0_copy: # for every set in p0 removed = False if element in p0 and element != p0[0]: for act in element: # for every activity in this set for e in self.end_activities: # for every end activity if e not in self.start_activities: if (act, e) in iterable_dfg: # check if connected # is there an element in dfg pointing from any act in a subset of p0 to an end activity for activ in element: if activ not in p0[0]: p0[0].append(activ) if element in p0: p0.remove(element) # remove subsets that are connected to an end activity removed = True break if removed: break for s in self.start_activities: if s not in self.end_activities: if not removed: if (s, act) in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove(element) # remove subsets that are connected to an end activity removed = True break else: break if removed: break iterable_dfg = list() for i in range(0, len(self.dfg)): iterable_dfg.append(self.dfg[i][0]) p0_copy = [] for int_el in p0: p0_copy.append(int_el) for element in p0: if element in p0 and element != p0[0]: for act in element: for e in self.end_activities: if (e, act) in iterable_dfg: # get those act, that are connected from an end activity for e2 in self.end_activities: # check, if the act is connected from all end activities if (e2, act) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove(element) # remove subsets that are connected to an end activity break for s in self.start_activities: if (act, s) in iterable_dfg: # same as above (in this case for activities connected to # a start activity) for s2 in self.start_activities: if (act, s2) not in iterable_dfg: for acti in element: if acti not in p0[0]: p0[0].append(acti) if element in p0: p0.remove(element) # remove subsets that are connected to an end activity break index_to_delete = [] for i in range(0, len(p0)): if not p0[i]: index_to_delete.insert(0, i) for index in index_to_delete: p0.remove(p0[index]) if len(p0) > 1: return [True, p0] else: return [False, []]
[docs] def check_for_cut(self, test_log, deleted_activity=None, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if deleted_activity is not None: del self.activities[deleted_activity] if parameters is None: parameters = {} dfg = [(k, v) for k, v in dfg_inst.apply(test_log, parameters=parameters).items() if v > 0] self.dfg = dfg self.outgoing = get_outgoing_edges(self.dfg) self.ingoing = get_ingoing_edges(self.dfg) self.log = test_log conn_components = detection_utils.get_connected_components(self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph(self.dfg, activities=self.activities) strongly_connected_components = [list(x) for x in nx.strongly_connected_components(this_nx_graph)] # search for cut and return true as soon as a cut is found: xor_cut = self.detect_xor(conn_components) if xor_cut[0]: return True else: sequence_cut = cut_detection.detect_sequential_cut(self, self.dfg, strongly_connected_components) if sequence_cut[0]: return True else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: return True else: loop_cut = self.detect_loop() if loop_cut[0]: return True else: return False else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
[docs] def detect_cut(self, second_iteration=False, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: else: conn_components = detection_utils.get_connected_components(self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph(self.dfg, activities=self.activities) strongly_connected_components = [list(x) for x in nx.strongly_connected_components(this_nx_graph)] xor_cut = self.detect_xor(conn_components) # the following part searches for a cut in the current log # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs # recursion is used on all the logs in new_logs if xor_cut[0]: logging.debug("xor_cut") self.detected_cut = 'concurrent' new_logs = split.split_xor(xor_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[i] = filtering_utils.keep_one_trace_per_variant(new_logs[i], parameters=parameters) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(l, activity_key) start_activities = list( start_activities_get.get_start_activities(l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities(l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: sequence_cut = cut_detection.detect_sequential_cut(self, self.dfg, strongly_connected_components) if sequence_cut[0]: logging.debug("sequence_cut") new_logs = split.split_sequence(sequence_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[i] = filtering_utils.keep_one_trace_per_variant(new_logs[i], parameters=parameters) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(l, activity_key) start_activities = list( start_activities_get.get_start_activities(l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities(l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: logging.debug("parallel_cut") new_logs = split.split_parallel(parallel_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[i] = filtering_utils.keep_one_trace_per_variant(new_logs[i], parameters=parameters) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(l, activity_key) start_activities = list( start_activities_get.get_start_activities(l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities(l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: loop_cut = self.detect_loop() if loop_cut[0]: logging.debug("loop_cut") new_logs = split.split_loop(loop_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[i] = filtering_utils.keep_one_trace_per_variant(new_logs[i], parameters=parameters) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply(l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(l, activity_key) start_activities = list( start_activities_get.get_start_activities(l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities(l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # if the code gets to this point, there is no base_case and no cut found in the log # therefore, we now apply fall through: else: self.apply_fall_through(parameters) else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
# this is called at the end of detect_cut, if no cut was found and a fallthrough needs to be applied
[docs] def apply_fall_through(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters) or parameters[ Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = (Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[ Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters) or parameters[ Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters) or parameters[ Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_trace, new_log = fall_through.empty_trace(self.log) # if an empty trace is found, the empty trace fallthrough applies # else: empty_trace = False if empty_trace: logging.debug("empty_trace") activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(new_log, activity_key) start_activities = list( start_activities_get.get_start_activities(new_log, parameters=self.parameters).keys()) end_activities = list(end_activities_get.get_end_activities(new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace(self.log, self.activities, activity_key) small_log = filtering_utils.keep_one_trace_per_variant(small_log, parameters=parameters) else: activity_once = False if use_act_once_per_trace and activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply(small_log, parameters=parameters).items() if v > 0] small_activities = attributes_get.get_attribute_values(small_log, activity_key) self.children.append( SubtreePlain(small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log start_activities = list( start_activities_get.get_start_activities(new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities(new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(self, self.log, self.activities, activity_key, parameters=parameters) small_log = filtering_utils.keep_one_trace_per_variant(small_log, parameters=parameters) else: activity_concurrent = False if use_act_concurrent and activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply(small_log, parameters=parameters).items() if v > 0] small_activities = attributes_get.get_attribute_values(small_log, activity_key) # append the concurrent activity as leaf: self.children.append( SubtreePlain(small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log: start_activities = list( start_activities_get.get_start_activities(new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities(new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop(self.log, self.start_activities, self.end_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant(new_log, parameters=parameters) else: strict_tau_loop = False if use_strict_tau_loop and strict_tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(new_log, activity_key) start_activities = list( start_activities_get.get_start_activities(new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities(new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop(self.log, self.start_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant(new_log, parameters=parameters) else: tau_loop = False if use_tau_loop and tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply(new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values(new_log, activity_key) start_activities = list(start_activities_get.get_start_activities(new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities(new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: logging.debug("flower model") activites_left = [] for trace in self.log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'flower'
# apply flower fall through as last option:
[docs]def make_tree(log, dfg, master_dfg, initial_dfg, activities, c, recursion_depth, noise_threshold, start_activities, end_activities, initial_start_activities, initial_end_activities, parameters=None): tree = SubtreePlain(log, dfg, master_dfg, initial_dfg, activities, c, recursion_depth, noise_threshold, start_activities, end_activities, initial_start_activities, initial_end_activities, parameters=parameters) return tree