Segmentation Heuristics
def dialogue_segmentation(logs, selection, seg_verbose=False):
"""
sections the dialogues in the game rounds based on a pre-defined heuristics
:param logs: list. List containing the Log objects created from the log files
:param selection: list. List containing the set of game indexes to be included in the current split
:param seg_verbose: bool. Set to True to print the decision structure
:return: A list of lists containing tuples of dialogue segments and their corresponding targets for the games in the given set
"""
cleaning_total = 0
section_counter = 0
dialogue_sections = []
for game in logs:
game_id = game.game_id
if selection and game_id not in selection:
continue
game_sections = []
for round_data in game.rounds:
selections = []
messages = round_data.messages
if seg_verbose: print("\n")
for message in round_data.messages:
if seg_verbose: print("{}: {}".format(message.speaker, message.text))
if message.type == 'selection':
selections.append((message.message_id, message.speaker, message.text))
if seg_verbose: print("\n")
if len(selections) > 6:
messages, cleaning_counter = clean_clicks(round_data)
cleaning_total += cleaning_counter
sections = []
current_section = []
current_targets = []
previous_selection = None
previous_turn = None
skip = 0
for i, message in enumerate(messages):
if skip > 0:
skip -= 1
continue
if seg_verbose: print("{}: {}".format(message.speaker, message.text))
if seg_verbose: print("--> Current section contains {} utterances".format(len(current_section)))
if message.type == 'text':
if previous_turn and seg_verbose: print("--> Previous turn text: ", previous_turn.text)
if previous_turn and is_selection(previous_turn):
if seg_verbose: print("--> Previous turn was selection")
if is_common_label(previous_selection):
if seg_verbose: print("--> Previous selection was common")
if previous_selection.speaker != message.speaker:
if seg_verbose: print("--> Previous selection was from other speaker")
next_message = messages[i + 1]
if next_message.type == 'selection':
if seg_verbose: print("--> Next turn is a selection")
if next_message.speaker == message.speaker:
if seg_verbose: print("--> Next selection is from same speaker")
if is_common_label(next_message):
if seg_verbose: print("--> Next selection is common")
if previous_selection.text == next_message.text:
if seg_verbose: print("--> Case 1")
# Case: After one speaker selected an image as common, the other speaker makes one utterance and marks the same image as common
# Resolution: The previous section is saved with the common image as referent and a new section is initialised with the trailing utterance of the second speaker
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = next_message
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 2")
# Case: After one speaker selected an image as common, the other speaker makes one utterance and marks an other image as common
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if get_target(previous_selection) == get_target(next_message):
if seg_verbose: print("--> Case 3")
# Case: After one speaker selected an image as common, the other speaker makes one utterance and marks the same image as different
# Resolution: The previous section is saved with the common image as referent and a new section is initialised with the trailing utterance of the second speaker
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = next_message
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 4")
# Case: After one speaker selected an image as common, the other speaker makes one utterance and marks an other image as different
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 5")
# Case: After one speaker selected an image as common, the other speaker makes one utterance and the first speaker marks a second image
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if next_message.speaker == message.speaker:
if i + 2 < len(messages):
second_next_message = messages[i + 2]
if second_next_message.speaker == next_message.speaker and second_next_message.type == 'selection' and get_target(
second_next_message) == get_target(previous_selection):
if seg_verbose: print("--> Case 6")
# Case: After one speaker selected an image as common, the other speaker makes two utterances and marks the same image
# Resolution: The trailing utterances are added to the current section and the current section is saved with the common image as referent
current_targets.append(get_target(previous_selection))
current_section.append(message)
current_section.append(next_message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = second_next_message
previous_turn = second_next_message
skip = 2
continue
else:
if seg_verbose: print("--> Case 7")
# Case: After one speaker selected an image as common, the other speaker makes multiple utterances without marking any images
# Resolution: Save the current section with the target marked as common and initialise a new section with the current utterance
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
pass
else:
if seg_verbose: print("--> Case 9")
# Case: After one speaker selected an image as common, there is an interaction between the speakers
# Resolution: Save the current section with the target marked as common and initialise a new section with the current utterance
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
next_message = messages[i + 1]
if next_message.type == 'selection':
if next_message.speaker != message.speaker:
if is_common_label(next_message):
if previous_selection.text == next_message.text:
if seg_verbose: print("--> Case 10")
# Case: After one speaker selected an image as common, he or she adds something, leading to the other speaker marking the same image as common as well
# Resolution: The trailing utterance is added to the current section and the current section is saved with the common image as referent. A new section is initialised empty
current_targets.append(get_target(previous_selection))
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 11")
# Case: After one speaker selected an image as common, he or she adds something, leading to the other speaker marking a different image as common
# Resolution: The trailing utterance is added to the current section and the current section is saved with both disagreed referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if get_target(previous_selection) == get_target(next_message):
if seg_verbose: print("--> Case 12")
# Case: After one speaker selected an image as common, he or she adds something, leading to the other speaker marking the same image as different
# Resolution: The current section is saved with the disagreed image as referent. A new section is initialised with the trailing utterance
current_targets.append(get_target(next_message))
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = previous_selection
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 13")
# Case: After one speaker selected an image as common, he or she adds something, leading to the other speaker marking another image as different
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 14")
# Case: After one speaker selected an image as common, he or she adds something and marks a second image
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 15")
# Case: After one speaker selected an image as common, he or she adds multiple utterances
# Resolution: Save the current section with the target marked as common and initialise a new section with the current utterance
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
if seg_verbose: print("--> Previous selection was different")
if previous_selection.speaker != message.speaker:
if seg_verbose: print("--> Previous speaker was the other participant")
next_message = messages[i + 1]
if next_message.type == 'selection':
if seg_verbose: print("--> Next message is selection")
if next_message.speaker == message.speaker:
if not is_common_label(next_message):
if previous_selection.text == next_message.text:
if seg_verbose: print("--> Case 16")
# Case: After one speaker selected an image as different, the other speaker makes one utterance and marks the same image as different
# Resolution: The previous section is saved with the wrongly labeled image as referent and a new section is initialised with the trailing utterance of the second speaker
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = next_message
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 17")
# Case: After one speaker selected an image as different, the other speaker makes one utterance and marks another image as different
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if get_target(previous_selection) == get_target(next_message):
if seg_verbose: print("--> Case 18")
# Case: After one speaker selected an image as different, the other speaker makes one utterance and marks the same image as common
# Resolution: The previous section is saved with the disagreed image as referent and a new section is initialised with the trailing utterance of the second speaker
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = next_message
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 19")
# Case: After one speaker selected an image as different, the other speaker makes one utterance and marks a different image as common
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 20")
# Case: After one speaker selected an image as different, the other speaker makes one utterance and the first speaker marks a second image
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents. A new section is initialised empty
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Next message is regular utterance")
if next_message.speaker == message.speaker:
if seg_verbose: print("--> Next speaker is current speaker")
if i + 2 < len(messages):
second_next_message = messages[i + 2]
if second_next_message.speaker == next_message.speaker and second_next_message.type == 'selection' and get_target(
second_next_message) == get_target(previous_selection):
if seg_verbose: print("--> Case 21")
# Case: After one speaker selected an image as different, the other speaker makes two utterances and marks the same image
# Resolution: The trailing utterances are added to the current section and the current section is saved with the marked image as referent
current_targets.append(get_target(previous_selection))
current_section.append(message)
current_section.append(next_message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = second_next_message
previous_turn = second_next_message
skip = 2
continue
else:
if seg_verbose: print("--> Case 22")
# Case: After one speaker selected an image as different, the other speaker makes multiple utterances without marking any images
# Resolution: Save the current section with the target marked as different and initialise a new section with the current utterance
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
pass
else:
if seg_verbose: print("--> Case 24")
# Case: After one speaker selected an image as different, there is an interaction between the speakers
# Resolution: Save the current section with the target marked as different and initialise a new section with the current utterance
current_targets.append(get_target(previous_selection))
if current_section:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
if seg_verbose: print("--> Previous speaker was the same participant")
next_message = messages[i + 1]
if next_message.type == 'selection':
if seg_verbose: print("--> Next message is selection")
if next_message.speaker != message.speaker:
if not is_common_label(next_message):
if previous_selection.text == next_message.text:
if seg_verbose: print("--> Case 25")
# Case: After one speaker selected an image as different, he or she adds something, leading to the other speaker marking the same image as different
# Resolution: The trailing utterance is added to the current section and the current section is saved with the wrongly labeled image as referent
current_targets.append(get_target(previous_selection))
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 26")
# Case: After one speaker selected an image as different, he or she adds something, leading to the other speaker marking a different image as different
# Resolution: The trailing utterance is added to the current section and the current section is saved with both disagreed referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if get_target(previous_selection) == get_target(next_message):
if seg_verbose: print("--> Case 27")
# Case: After one speaker selected an image as different, he or she adds something, leading to the other speaker marking the same image as common
# Resolution: The trailing utterance is added to the current section and the current section is saved with the disagreed image as referent
current_targets.append(get_target(next_message))
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_selection = next_message
previous_turn = message
skip = 1
continue
else:
if seg_verbose: print("--> Case 28")
# Case: After one speaker selected an image as different, he or she adds something, leading to the other speaker marking another image as common
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 29")
# Case: After one speaker selected an image as different, he or she adds something and marks a second image
# Resolution: The trailing utterance is added to the current section and the current section is saved with both referents
current_targets.extend(
[get_target(previous_selection), get_target(next_message)])
current_section.append(message)
sections.append((current_section, set(current_targets)))
current_section = []
current_targets = []
previous_selection = next_message
previous_turn = next_message
skip = 1
continue
else:
if seg_verbose: print("--> Case 30")
# Case: After one speaker selected an image as different, he or she adds an utterance
# Resolution: Save the current section with the target marked as different and initialise a new section with the current utterance
if current_section and current_targets:
sections.append((current_section, set(current_targets)))
current_section = [message]
current_targets = []
previous_turn = message
continue
else:
if seg_verbose: print("--> Case 31")
# Case: Regular utterance following another regular utterance
# Resolution: Add utterance to current section
current_section.append(message)
current_targets = []
previous_turn = message
continue
elif message.type == 'selection':
if seg_verbose: print("--> Selection")
if current_section:
if seg_verbose: print("--> Case 32")
# A speaker marks an image
# Resolution: Add the label of the selection to the set of current targets
if seg_verbose: print("--> Adding target")
current_targets.append(get_target(message))
previous_selection = message
previous_turn = message
continue
else:
if seg_verbose: print("--> Case 33")
if seg_verbose: print("--> Current section is empty. Skipping selection")
continue
else:
continue
if current_section and current_targets:
sections.append((current_section, set(current_targets)))
sections = (sections, set(round_data.images["A"] + round_data.images["B"]))
game_sections.append(sections)
if seg_verbose: print("{} dialogue sections encountered in round".format(len(sections)))
section_counter += len(sections)
dialogue_sections.append((game_id, game_sections))
if seg_verbose: print("Total of {} duplicate labeling action(s) removed.".format(cleaning_total))
if seg_verbose: print("Processed {} dialogue(s).".format(len(dialogue_sections)))
if seg_verbose: print("Generated a total of {} dialogue section(s).".format(section_counter))
return dialogue_sections