'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
import numpy as np
from pm4py.util import xes_constants as xes
from pm4py.util import constants
import deprecation
ENABLE_ACTIVITY_DEF_REPRESENTATION = "enable_activity_def_representation"
ENABLE_SUCC_DEF_REPRESENTATION = "enable_succ_def_representation"
[docs]def get_string_trace_attribute_rep(trace, trace_attribute):
"""
Get a representation of the feature name associated to a string trace attribute value
Parameters
------------
trace
Trace of the log
trace_attribute
Attribute of the trace to consider
Returns
------------
rep
Representation of the feature name associated to a string trace attribute value
"""
if trace_attribute in trace.attributes:
return "trace:" + str(trace_attribute) + "@" + str(trace.attributes[trace_attribute])
return "trace:" + str(trace_attribute) + "@UNDEFINED"
[docs]def get_all_string_trace_attribute_values(log, trace_attribute):
"""
Get all string trace attribute values representations for a log
Parameters
------------
log
Trace log
trace_attribute
Attribute of the trace to consider
Returns
------------
list
List containing for each trace a representation of the feature name associated to the attribute
"""
values = set()
for trace in log:
values.add(get_string_trace_attribute_rep(trace, trace_attribute))
return list(sorted(values))
[docs]def get_string_event_attribute_rep(event, event_attribute):
"""
Get a representation of the feature name associated to a string event attribute value
Parameters
------------
event
Single event of a trace
event_attribute
Event attribute to consider
Returns
------------
rep
Representation of the feature name associated to a string event attribute value
"""
return "event:" + str(event_attribute) + "@" + str(event[event_attribute])
[docs]def get_values_event_attribute_for_trace(trace, event_attribute):
"""
Get all the representations for the events of a trace associated to a string event attribute values
Parameters
-------------
trace
Trace of the log
event_attribute
Event attribute to consider
Returns
-------------
values
All feature names present for the given attribute in the given trace
"""
values_trace = set()
for event in trace:
if event_attribute in event:
values_trace.add(get_string_event_attribute_rep(event, event_attribute))
if not values_trace:
values_trace.add("event:" + str(event_attribute) + "@UNDEFINED")
return values_trace
[docs]def get_all_string_event_attribute_values(log, event_attribute):
"""
Get all the representations for all the traces of the log associated to a string event attribute values
Parameters
------------
log
Trace of the log
event_attribute
Event attribute to consider
Returns
------------
values
All feature names present for the given attribute in the given log
"""
values = set()
for trace in log:
values = values.union(get_values_event_attribute_for_trace(trace, event_attribute))
return list(sorted(values))
[docs]def get_string_event_attribute_succession_rep(event1, event2, event_attribute):
"""
Get a representation of the feature name associated to a string event attribute value
Parameters
------------
event1
First event of the succession
event2
Second event of the succession
event_attribute
Event attribute to consider
Returns
------------
rep
Representation of the feature name associated to a string event attribute value
"""
return "succession:" + str(event_attribute) + "@" + str(event1[event_attribute]) + "#" + str(
event2[event_attribute])
[docs]def get_values_event_attribute_succession_for_trace(trace, event_attribute):
"""
Get all the representations for the events of a trace associated to a string event attribute succession values
Parameters
-------------
trace
Trace of the log
event_attribute
Event attribute to consider
Returns
-------------
values
All feature names present for the given attribute succession in the given trace
"""
values_trace = set()
for i in range(len(trace) - 1):
event1 = trace[i]
event2 = trace[i + 1]
if event_attribute in event1 and event_attribute in event2:
values_trace.add(get_string_event_attribute_succession_rep(event1, event2, event_attribute))
if not values_trace:
values_trace.add("succession:" + str(event_attribute) + "@UNDEFINED")
return values_trace
[docs]def get_all_string_event_succession_attribute_values(log, event_attribute):
"""
Get all the representations for all the traces of the log associated to a string event attribute succession values
Parameters
------------
log
Trace of the log
event_attribute
Event attribute to consider
Returns
------------
values
All feature names present for the given attribute succession in the given log
"""
values = set()
for trace in log:
values = values.union(get_values_event_attribute_succession_for_trace(trace, event_attribute))
return list(sorted(values))
[docs]def get_numeric_trace_attribute_rep(trace_attribute):
"""
Get the feature name associated to a numeric trace attribute
Parameters
------------
trace_attribute
Name of the trace attribute
Returns
------------
feature_name
Name of the feature
"""
return "trace:" + trace_attribute
[docs]def get_numeric_trace_attribute_value(trace, trace_attribute):
"""
Get the value of a numeric trace attribute from a given trace
Parameters
------------
trace
Trace of the log
Returns
------------
value
Value of the numeric trace attribute for the given trace
"""
if trace_attribute in trace.attributes:
return trace.attributes[trace_attribute]
raise Exception("at least a trace without trace attribute: " + trace_attribute)
[docs]def get_numeric_event_attribute_rep(event_attribute):
"""
Get the feature name associated to a numeric event attribute
Parameters
------------
event_attribute
Name of the event attribute
Returns
-------------
feature_name
Name of the feature
"""
return "event:" + event_attribute
[docs]def get_numeric_event_attribute_value(event, event_attribute):
"""
Get the value of a numeric event attribute from a given event
Parameters
-------------
event
Event
Returns
-------------
value
Value of the numeric event attribute for the given event
"""
if event_attribute in event:
return event[event_attribute]
return None
[docs]def get_numeric_event_attribute_value_trace(trace, event_attribute):
"""
Get the value of the last occurrence of a numeric event attribute given a trace
Parameters
-------------
trace
Trace of the log
Returns
-------------
value
Value of the last occurrence of a numeric trace attribute for the given trace
"""
non_zero_values = []
for event in trace:
value = get_numeric_event_attribute_value(event, event_attribute)
if value is not None:
non_zero_values.append(value)
if len(non_zero_values) > 0:
return non_zero_values[-1]
raise Exception("at least a trace without any event with event attribute: " + event_attribute)
[docs]@deprecation.deprecated('2.2.8', '3.0.0', details="please use pm4py.algo.transformation.log_to_features instead")
def get_default_representation_with_attribute_names(log, parameters=None, feature_names=None):
"""
Gets the default data representation of an event log (for process tree building)
returning also the attribute names
Parameters
-------------
log
Trace log
parameters
Possible parameters of the algorithm
feature_names
(If provided) Feature to use in the representation of the log
Returns
-------------
data
Data to provide for decision tree learning
feature_names
Names of the features, in order
"""
from pm4py.statistics.attributes.log.select import select_attributes_from_log_for_tree
if parameters is None:
parameters = {}
enable_activity_def_representation = parameters[
ENABLE_ACTIVITY_DEF_REPRESENTATION] if ENABLE_ACTIVITY_DEF_REPRESENTATION in parameters else False
enable_succ_def_representation = parameters[
ENABLE_SUCC_DEF_REPRESENTATION] if ENABLE_SUCC_DEF_REPRESENTATION in parameters else False
activity_key = parameters[
constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
blacklist = parameters["blacklist"] if "blacklist" in parameters else []
str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(log)
str_evsucc_attr = None
if enable_succ_def_representation:
str_evsucc_attr = [activity_key]
if enable_activity_def_representation and activity_key not in str_ev_attr:
str_ev_attr.append(activity_key)
str_tr_attr = [x for x in str_tr_attr if x not in blacklist]
str_ev_attr = [x for x in str_ev_attr if x not in blacklist]
num_tr_attr = [x for x in num_tr_attr if x not in blacklist]
num_ev_attr = [x for x in num_ev_attr if x not in blacklist]
if str_evsucc_attr is not None:
str_evsucc_attr = [x for x in str_evsucc_attr if x not in blacklist]
data, feature_names = get_representation(log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr,
str_evsucc_attr=str_evsucc_attr,
feature_names=feature_names)
return data, feature_names, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr
[docs]@deprecation.deprecated('2.2.8', '3.0.0', details="please use pm4py.algo.transformation.log_to_features instead")
def get_default_representation(log, parameters=None, feature_names=None):
"""
Gets the default data representation of an event log (for process tree building)
Parameters
-------------
log
Trace log
parameters
Possible parameters of the algorithm
feature_names
(If provided) Feature to use in the representation of the log
Returns
-------------
data
Data to provide for decision tree learning
feature_names
Names of the features, in order
"""
from pm4py.statistics.attributes.log.select import select_attributes_from_log_for_tree
if parameters is None:
parameters = {}
enable_activity_def_representation = parameters[
ENABLE_ACTIVITY_DEF_REPRESENTATION] if ENABLE_ACTIVITY_DEF_REPRESENTATION in parameters else False
enable_succ_def_representation = parameters[
ENABLE_SUCC_DEF_REPRESENTATION] if ENABLE_SUCC_DEF_REPRESENTATION in parameters else False
activity_key = parameters[
constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
blacklist = parameters["blacklist"] if "blacklist" in parameters else []
str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(log)
str_evsucc_attr = None
if enable_succ_def_representation:
str_evsucc_attr = [activity_key]
if enable_activity_def_representation and activity_key not in str_ev_attr:
str_ev_attr.append(activity_key)
str_tr_attr = [x for x in str_tr_attr if x not in blacklist]
str_ev_attr = [x for x in str_ev_attr if x not in blacklist]
num_tr_attr = [x for x in num_tr_attr if x not in blacklist]
num_ev_attr = [x for x in num_ev_attr if x not in blacklist]
if str_evsucc_attr is not None:
str_evsucc_attr = [x for x in str_evsucc_attr if x not in blacklist]
return get_representation(log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr, str_evsucc_attr=str_evsucc_attr,
feature_names=feature_names)
[docs]@deprecation.deprecated('2.2.8', '3.0.0', details="please use pm4py.algo.transformation.log_to_features instead")
def get_representation(log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr, str_evsucc_attr=None,
feature_names=None):
"""
Get a representation of the event log that is suited for the data part of the decision tree learning
NOTE: this function only encodes the last value seen for each attribute
Parameters
-------------
log
Trace log
str_tr_attr
List of string trace attributes to consider in data vector creation
str_ev_attr
List of string event attributes to consider in data vector creation
num_tr_attr
List of numeric trace attributes to consider in data vector creation
num_ev_attr
List of numeric event attributes to consider in data vector creation
str_evsucc_attr
List of attributes succession of values to consider in data vector creation
feature_names
(If provided) Feature to use in the representation of the log
Returns
-------------
data
Data to provide for decision tree learning
feature_names
Names of the features, in order
"""
data = []
dictionary = {}
count = 0
if feature_names is None:
feature_names = []
for trace_attribute in str_tr_attr:
values = get_all_string_trace_attribute_values(log, trace_attribute)
for value in values:
dictionary[value] = count
feature_names.append(value)
count = count + 1
for event_attribute in str_ev_attr:
values = get_all_string_event_attribute_values(log, event_attribute)
for value in values:
dictionary[value] = count
feature_names.append(value)
count = count + 1
for trace_attribute in num_tr_attr:
dictionary[get_numeric_trace_attribute_rep(trace_attribute)] = count
feature_names.append(get_numeric_trace_attribute_rep(trace_attribute))
count = count + 1
for event_attribute in num_ev_attr:
dictionary[get_numeric_event_attribute_rep(event_attribute)] = count
feature_names.append(get_numeric_event_attribute_rep(event_attribute))
count = count + 1
if str_evsucc_attr:
for event_attribute in str_evsucc_attr:
values = get_all_string_event_succession_attribute_values(log, event_attribute)
for value in values:
dictionary[value] = count
feature_names.append(value)
count = count + 1
else:
count = len(feature_names)
for index, value in enumerate(feature_names):
dictionary[value] = index
for trace in log:
trace_rep = [0] * count
for trace_attribute in str_tr_attr:
trace_attr_rep = get_string_trace_attribute_rep(trace, trace_attribute)
if trace_attr_rep in dictionary:
trace_rep[dictionary[trace_attr_rep]] = 1
for event_attribute in str_ev_attr:
values = get_values_event_attribute_for_trace(trace, event_attribute)
for value in values:
if value in dictionary:
trace_rep[dictionary[value]] = 1
for trace_attribute in num_tr_attr:
this_value = get_numeric_trace_attribute_rep(trace_attribute)
if this_value in dictionary:
trace_rep[dictionary[this_value]] = get_numeric_trace_attribute_value(
trace, trace_attribute)
for event_attribute in num_ev_attr:
this_value = get_numeric_event_attribute_rep(event_attribute)
if this_value in dictionary:
trace_rep[dictionary[this_value]] = get_numeric_event_attribute_value_trace(
trace, event_attribute)
if str_evsucc_attr:
for event_attribute in str_evsucc_attr:
values = get_values_event_attribute_succession_for_trace(trace, event_attribute)
for value in values:
if value in dictionary:
trace_rep[dictionary[value]] = 1
data.append(trace_rep)
data = np.asarray(data)
return data, feature_names