93 lines
2.8 KiB
Python
93 lines
2.8 KiB
Python
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
def compute_pnr_name_city():
|
||
|
|
data = pd.read_csv("./customer_utterance_processing/customer_provide_answer.csv")
|
||
|
|
|
||
|
|
def unique_pnr_count():
|
||
|
|
pnr_data = data[data["Input.Answer"] == "ZZZZZZ"]
|
||
|
|
unique_pnr_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in pnr_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if "ZZZZZZ" in t
|
||
|
|
}
|
||
|
|
return len(unique_pnr_set)
|
||
|
|
|
||
|
|
def unique_name_count():
|
||
|
|
pnr_data = data[data["Input.Answer"] == "John Doe"]
|
||
|
|
unique_pnr_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in pnr_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if "John Doe" in t
|
||
|
|
}
|
||
|
|
return len(unique_pnr_set)
|
||
|
|
|
||
|
|
def unique_city_count():
|
||
|
|
pnr_data = data[data["Input.Answer"] == "Heathrow Airport"]
|
||
|
|
unique_pnr_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in pnr_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if "Heathrow Airport" in t
|
||
|
|
}
|
||
|
|
return len(unique_pnr_set)
|
||
|
|
|
||
|
|
def unique_entity_count(entity_template_tags):
|
||
|
|
# entity_data = data[data['Input.Prompt'] == entity_template_tag]
|
||
|
|
entity_data = data
|
||
|
|
unique_entity_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in entity_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if any(et in t for et in entity_template_tags)
|
||
|
|
}
|
||
|
|
return len(unique_entity_set)
|
||
|
|
|
||
|
|
print('PNR', unique_pnr_count())
|
||
|
|
print('Name', unique_name_count())
|
||
|
|
print('City', unique_city_count())
|
||
|
|
print('Payment', unique_entity_count(['KPay', 'ZPay', 'Credit Card']))
|
||
|
|
|
||
|
|
|
||
|
|
def compute_date():
|
||
|
|
entity_template_tags = ['27 january', 'December 18']
|
||
|
|
data = pd.read_csv("./customer_utterance_processing/customer_provide_departure.csv")
|
||
|
|
# data.sample(10)
|
||
|
|
|
||
|
|
def unique_entity_count(entity_template_tags):
|
||
|
|
# entity_data = data[data['Input.Prompt'] == entity_template_tag]
|
||
|
|
entity_data = data
|
||
|
|
unique_entity_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in entity_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if any(et in t for et in entity_template_tags)
|
||
|
|
}
|
||
|
|
return len(unique_entity_set)
|
||
|
|
|
||
|
|
print('Date', unique_entity_count(entity_template_tags))
|
||
|
|
|
||
|
|
|
||
|
|
def compute_option():
|
||
|
|
entity_template_tag = 'third'
|
||
|
|
data = pd.read_csv("./customer_utterance_processing/customer_provide_flight_selection.csv")
|
||
|
|
|
||
|
|
def unique_entity_count():
|
||
|
|
entity_data = data[data['Input.Prompt'] == entity_template_tag]
|
||
|
|
unique_entity_set = {
|
||
|
|
t
|
||
|
|
for n in range(1, 5)
|
||
|
|
for t in entity_data[f"Answer.utterance-{n}"].tolist()
|
||
|
|
if entity_template_tag in t
|
||
|
|
}
|
||
|
|
return len(unique_entity_set)
|
||
|
|
|
||
|
|
print('Option', unique_entity_count())
|
||
|
|
|
||
|
|
|
||
|
|
compute_pnr_name_city()
|
||
|
|
compute_date()
|
||
|
|
compute_option()
|