'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
import pkgutil
from pm4py import util as pmutil
from pm4py.algo.discovery.dfg.variants import native as dfg_inst
from pm4py.algo.discovery.inductive.util import shared_constants
from pm4py.algo.discovery.inductive.util import tree_consistency
from pm4py.algo.discovery.inductive.util.petri_el_count import Counts
from pm4py.algo.discovery.inductive.variants.im.util import get_tree_repr_implain
from pm4py.algo.discovery.inductive.variants.im_f.data_structures import subtree_infrequent as subtree
from pm4py.objects.conversion.log import converter
from pm4py.objects.conversion.process_tree import converter as tree_to_petri
from pm4py.objects.log.obj import EventLog
from pm4py.objects.log.util import filtering_utils
from pm4py.objects.process_tree.utils import generic
from pm4py.objects.process_tree.utils.generic import tree_sort
from pm4py.statistics.attributes.log import get as attributes_get
from pm4py.statistics.end_activities.log import get as end_activities_get
from pm4py.statistics.start_activities.log import get as start_activities_get
from pm4py.util import exec_utils
from pm4py.util import variants_util
from pm4py.util import xes_constants
import deprecation
from pm4py.util import constants
from enum import Enum
[docs]class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
START_TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
NOISE_THRESHOLD = "noiseThreshold"
EMPTY_TRACE_KEY = "empty_trace"
ONCE_PER_TRACE_KEY = "once_per_trace"
CONCURRENT_KEY = "concurrent"
STRICT_TAU_LOOP_KEY = "strict_tau_loop"
TAU_LOOP_KEY = "tau_loop"
[docs]def apply(log, parameters):
"""
Apply the IM_F algorithm to a log obtaining a Petri net along with an initial and final marking
Parameters
-----------
log
Log
parameters
Parameters of the algorithm, including:
Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
(default concept:name)
Returns
-----------
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
"""
if pkgutil.find_loader("pandas"):
import pandas as pd
from pm4py.statistics.variants.pandas import get as variants_get
if type(log) is pd.DataFrame:
vars = variants_get.get_variants_count(log, parameters=parameters)
return apply_variants(vars, parameters=parameters)
log = converter.apply(log, parameters=parameters)
net, initial_marking, final_marking = tree_to_petri.apply(apply_tree(log, parameters))
return net, initial_marking, final_marking
[docs]def apply_variants(variants, parameters=None):
"""
Apply the IM_F algorithm to a dictionary of variants, obtaining a Petri net along with an initial and final marking
Parameters
-----------
variants
Variants
parameters
Parameters of the algorithm, including:
Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
(default concept:name)
Returns
-----------
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
"""
net, im, fm = tree_to_petri.apply(apply_tree_variants(variants, parameters=parameters))
return net, im, fm
[docs]@deprecation.deprecated('2.2.10', '3.0.0', details='use newer IM implementation (IM_CLEAN)')
def apply_tree(log, parameters):
"""
Apply the IM_FF algorithm to a log obtaining a process tree
Parameters
----------
log
Log
parameters
Parameters of the algorithm, including:
Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
(default concept:name)
Returns
----------
process_tree
Process tree
"""
if parameters is None:
parameters = {}
if pkgutil.find_loader("pandas"):
import pandas as pd
from pm4py.statistics.variants.pandas import get as variants_get
if type(log) is pd.DataFrame:
vars = variants_get.get_variants_count(log, parameters=parameters)
return apply_tree_variants(vars, parameters=parameters)
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
pmutil.xes_constants.DEFAULT_NAME_KEY)
log = converter.apply(log, parameters=parameters)
# keep only the activity attribute (since the others are not used)
log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)
noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters,
shared_constants.NOISE_THRESHOLD_IMF)
dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0]
c = Counts()
activities = attributes_get.get_attribute_values(log, activity_key)
start_activities = list(start_activities_get.get_start_activities(log, parameters=parameters).keys())
end_activities = list(end_activities_get.get_end_activities(log, parameters=parameters).keys())
contains_empty_traces = False
traces_length = [len(trace) for trace in log]
if traces_length:
contains_empty_traces = min([len(trace) for trace in log]) == 0
# set the threshold parameter based on f and the max value in the dfg:
max_value = 0
for key, value in dfg:
if value > max_value:
max_value = value
threshold = noise_threshold * max_value
recursion_depth = 0
sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold,
start_activities, end_activities,
start_activities, end_activities, parameters=parameters)
process_tree = get_tree_repr_implain.get_repr(sub, 0, contains_empty_traces=contains_empty_traces)
# Ensures consistency to the parent pointers in the process tree
tree_consistency.fix_parent_pointers(process_tree)
# Fixes a 1 child XOR that is added when single-activities flowers are found
tree_consistency.fix_one_child_xor_flower(process_tree)
# folds the process tree (to simplify it in case fallthroughs/filtering is applied)
process_tree = generic.fold(process_tree)
# sorts the process tree to ensure consistency in different executions of the algorithm
tree_sort(process_tree)
return process_tree
[docs]def apply_tree_variants(variants, parameters=None):
"""
Apply the IM_F algorithm to a dictionary of variants obtaining a process tree
Parameters
----------
variants
Variants
parameters
Parameters of the algorithm, including:
Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
(default concept:name)
Returns
----------
process_tree
Process tree
"""
log = EventLog()
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
var_keys = list(variants.keys())
for var in var_keys:
val = variants[var]
if type(val) is list:
val = len(val)
for i in range(val):
trace = variants_util.variant_to_trace(var, parameters=parameters)
log.append(trace)
return apply_tree(log, parameters=parameters)
[docs]def apply_infrequent_petrinet(tree):
return tree_to_petri.apply(tree)