-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathconversation.py
671 lines (561 loc) · 23.3 KB
/
conversation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
"""DFCX End to End Conversation Functions"""
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import time
import traceback
import uuid
from threading import Thread
from typing import Any, Dict
import pandas as pd
from google.api_core import exceptions as core_exceptions
from google.cloud.dialogflowcx_v3beta1 import services, types
from proto.marshal.collections import maps, repeated
from dfcx_scrapi.core import flows, pages, scrapi_base
logging.basicConfig(
format="[dfcx] %(levelname)s:%(message)s", level=logging.INFO
)
MAX_RETRIES = 3
class DialogflowConversation(scrapi_base.ScrapiBase):
"""Class that wraps the SessionsClient to hold end to end conversations
and maintain internal session state
"""
def __init__(
self,
config=None,
creds_path: str = None,
creds_dict: Dict = None,
creds=None,
agent_id: str = None,
language_code: str = "en",
):
super().__init__(
creds_path=creds_path,
creds_dict=creds_dict,
creds=creds,
agent_id=agent_id,
)
logging.debug(
"create conversation with creds_path: %s | agent_id: %s",
creds_path, agent_id)
self.agent_id = self._set_agent_id(agent_id, config)
self.language_code = self._set_language_code(language_code, config)
self.start_time = None
self.query_result = None
self.session_id = None
self.turn_count = None
self.agent_env = {} # empty
self.restart()
self.flows = flows.Flows(
creds=self.creds, language_code=self.language_code)
self.pages = pages.Pages(
creds=self.creds, language_code=self.language_code)
@staticmethod
def _set_language_code(language_code: str, config: Dict[str, Any]) -> str:
"""Determines how to set the language_code based on user inputs.
We implement this for backwards compatability.
"""
# Config will take precedence if provided
if config:
config_lang_code = config.get("language_code", None)
# We'll only return if it exist in the config on the off chance that
# some users have provided the langauge_code as a top level arg in
# addition to providing the config
if config_lang_code:
return config_lang_code
return language_code
@staticmethod
def _set_agent_id(input_agent_id: str, config: Dict[str, Any]) -> str:
"""Determines how to set the agent_id based on user inputs.
We implement this for backwards compatability.
"""
# Config will take precedence if provided
if config:
config_agent_path = config.get("agent_path", None)
# We'll only return if it exist in the config on the off chance that
# some users have provided the agent_id as a top level arg in
# addition to providing the config
if config_agent_path:
return config_agent_path
elif input_agent_id:
return input_agent_id
return None
@staticmethod
def _get_match_type_from_map(match_type: int):
"""Translates the match_type enum int value into a more descriptive
string.
"""
match_type_map = {
0: "MATCH_TYPE_UNSPECIFIED",
1: "INTENT",
2: "DIRECT_INTENT",
3: "PARAMETER_FILLING",
4: "NO_MATCH",
5: "NO_INPUT",
6: "EVENT",
8: "KNOWLEDGE_CONNECTOR",
9: "LLM"
}
return match_type_map[match_type]
@staticmethod
def _validate_test_set_input(test_set: pd.DataFrame):
"""Validates that all pages referenced in the test set exist in the
agent.
"""
mask = test_set.page_id.isna().to_list()
invalid_pages = set(test_set.page_display_name[mask].to_list())
if invalid_pages:
raise UserWarning(
"The following Pages are invalid and missing Page "
f"IDs: \n{invalid_pages}\n\nPlease ensure that your Page "
"Display Names do not contain typos.\nFor Default Start Page "
"use the special page display name START_PAGE."
)
@staticmethod
def progress_bar(current, total, bar_length=50, type_="Progress"):
"""Display progress bar for processing."""
percent = float(current) * 100 / total
arrow = "-" * int(percent / 100 * bar_length - 1) + ">"
spaces = " " * (bar_length - len(arrow))
logging.info(
f"{type_}({current}/{total})" + f"[{arrow}{spaces}] {percent:.2f}%"
)
@staticmethod
def _build_query_params_object(
parameters,
current_page,
disable_webhook,
end_user_metadata):
query_params = types.session.QueryParameters(
disable_webhook=disable_webhook,
current_page=current_page,
)
if parameters:
query_params.parameters = parameters
if end_user_metadata:
query_params.end_user_metadata = end_user_metadata
return query_params
@staticmethod
def _build_query_input_object(input_obj, language_code):
query_input = types.session.QueryInput()
if "dtmf" in input_obj:
digits = str(input_obj["dtmf"])
finish_digit = None
if "finish_digit" in input_obj:
finish_digit = str(input_obj["finish_digit"])
dtmf_input = types.session.DtmfInput(
digits=digits, finish_digit=finish_digit
)
query_input = types.session.QueryInput(
dtmf=dtmf_input,
language_code=language_code,
)
elif "intent" in input_obj:
intent_input = types.session.IntentInput(intent=input_obj["intent"])
query_input = types.session.QueryInput(
intent=intent_input, language_code=language_code
)
elif "event" in input_obj:
event_input = types.session.EventInput(event=input_obj["event"])
query_input = types.session.QueryInput(
event=event_input, language_code=language_code
)
elif "text" in input_obj:
text = input_obj["text"]
logging.debug("Input text: %s", text)
text_input = types.session.TextInput(text=text)
query_input = types.session.QueryInput(
text=text_input,
language_code=language_code,
)
return query_input
@staticmethod
def _gather_text_responses(text_message):
flat_texts = "\n".join(text_message.text)
return flat_texts
def _gather_response_messages(self, response_messages):
rm_gathered = []
for msg in response_messages:
if msg.payload:
msg = {
"payload": self.recurse_proto_marshal_to_dict(msg.payload)
}
elif msg.play_audio:
msg = {"play_audio": {"audio_uri": msg.play_audio.audio_uri}}
elif msg.live_agent_handoff:
msg = {
"live_agent_handoff": self.recurse_proto_marshal_to_dict(
msg.live_agent_handoff.metadata
)
}
elif msg.conversation_success:
msg = {
"conversation_success": self.recurse_proto_marshal_to_dict(
msg.conversation_success.metadata
)
}
elif msg.output_audio_text:
msg = {"output_audio_text": msg.output_audio_text.text}
elif msg.text:
msg = {"text": self._gather_text_responses(msg.text)}
rm_gathered.append(msg)
return rm_gathered
def _gather_query_result_parameters(self, input_parameters):
output_parameters = {}
for param in input_parameters:
val = input_parameters[param]
# If we find a RepeatedComposite (i.e. List) we will recurse
# down and convert to lists/dics/str as needed.
if isinstance(val, repeated.RepeatedComposite):
val = self.recurse_proto_repeated_composite(val)
elif isinstance(val, maps.MapComposite):
val = self.recurse_proto_marshal_to_dict(val)
output_parameters[param] = val
return output_parameters
def _page_id_mapper(self):
"""Initializes the agent_pages_map dataframe.
This dataframe contains the flow_display_name, page_display_name,
and page_id for each page in the agent.
"""
agent_pages_map = pd.DataFrame()
flow_map = self.flows.get_flows_map(agent_id=self.agent_id)
for flow_id in flow_map.keys():
page_map = self.pages.get_pages_map(flow_id=flow_id)
flow_mapped = pd.DataFrame.from_dict(page_map, orient="index")
flow_mapped["page_id"] = flow_mapped.index
flow_mapped = flow_mapped.rename(columns={0: "page_display_name"})
flow_mapped.insert(0, "flow_display_name", flow_map[flow_id])
agent_pages_map = pd.concat([agent_pages_map, flow_mapped])
self.agent_pages_map = agent_pages_map.reset_index(drop=True)
def _get_reply_results(self, utterance, page_id, results, i):
"""Get results of single text utterance to CX Agent.
Args:
utterance: Text to send to the bot for testing.
page_id: Specified CX Page to send the utterance request to
results: Pandas Dataframe to capture and store the results
i: Internal tracking for Python Threading
"""
response = self.reply(
send_obj={"text": utterance}, current_page=page_id, restart=True
)
target_page = response["page_name"]
results["target_page"][i] = target_page
results["match"][i] = response["match"]
def _get_intent_detection(self, test_set: pd.DataFrame):
"""Gets the results of a subset of Intent Detection tests.
NOTE - This is an internal method used by run_intent_detection to
manage parallel intent detection requests and should not be used as a
standalone function.
"""
self._page_id_mapper()
test_set_mapped = pd.merge(
test_set,
self.agent_pages_map,
on=["flow_display_name", "page_display_name"],
how="left",
)
utterances = list(test_set_mapped["utterance"])
page_ids = list(test_set_mapped["page_id"])
self._validate_test_set_input(test_set_mapped)
threads = [None] * len(test_set_mapped)
results = {
"target_page": [None] * len(test_set_mapped),
"match":[None] * len(test_set_mapped),
}
for i, (utterance, page_id) in enumerate(zip(utterances, page_ids)):
threads[i] = Thread(
target=self._get_reply_results,
args=(utterance, page_id, results, i),
)
threads[i].start()
for _, thread in enumerate(threads):
thread.join()
test_set_mapped["target_page"] = results["target_page"]
test_set_mapped["match"] = results["match"]
test_set_mapped = test_set_mapped.drop(columns=["page_id"])
intent_detection = test_set_mapped.copy()
return intent_detection
def restart(self):
"""Starts a new session/conversation for this agent"""
self.session_id = uuid.uuid4()
self.turn_count = 0
def set_agent_env(self, param, value):
"""Setting changes related to the environment"""
logging.info("setting agent_env param:[%s] = value:[%s]", param, value)
self.agent_env[param] = value
def checkpoint(self, msg=None, start=False):
"""Log a checkpoint to time progress and debug bottleneck"""
if start:
start_time = time.perf_counter()
self.start_time = start_time
else:
start_time = self.start_time
duration = round((time.perf_counter() - start_time), 2)
if duration > 2:
if msg:
logging.info(f"{duration:0.2f}s {msg}")
@scrapi_base.api_call_counter_decorator
def reply(
self,
send_obj: Dict[str, str],
restart: bool = False,
retries: int = 0,
current_page: str = None,
checkpoints: bool = False,
):
"""Runs intent detection on one utterance and gets the agent reply.
Args:
send_obj: Dictionary with the following structure:
{'text': str,
'params': Dict[str,str],
'dtmf': str,
'end_user_metadata': Dict[str, str],}
restart: Boolean flag that determines whether to use the existing
session ID or start a new conversation with a new session ID.
Passing True will create a new session ID on subsequent calls.
Defaults to False.
retries: used for recurse calling this func if API fails
current_page: Specify the page id to start the conversation from
checkpoints: Boolean flag to enable/disable Checkpoint timer
debugging. Defaults to False.
Returns:
A dictionary for the agent reply to to the submitted text.
Includes keys response_messages, confidence, page_name,
intent_name, match_type, match, and params.
"""
text = send_obj.get("text")
send_params = send_obj.get("params")
end_user_metadata = send_obj.get("end_user_metadata")
if text and len(text) > 256:
logging.warning(
"Text input is too long. Truncating to 256 characters."
)
text = text[0:256]
logging.warning(f"TRUNCATED TEXT: {text}")
custom_environment = self.agent_env.get("environment")
disable_webhook = self.agent_env.get("disable_webhook") or False
if checkpoints:
self.checkpoint(start=True)
if restart:
self.restart()
client_options = self._set_region(self.agent_id)
session_client = services.sessions.SessionsClient(
credentials=self.creds, client_options=client_options
)
session_path = f"{self.agent_id}/sessions/{self.session_id}"
if custom_environment:
logging.info("req using env: %s", custom_environment)
session_path = (
f"{self.agent_id}/environments/"
f"{custom_environment}/sessions/{self.session_id}"
)
# Build Query Params object
query_params = self._build_query_params_object(
send_params, current_page, disable_webhook, end_user_metadata
)
# Build Query Input object
query_input = self._build_query_input_object(
send_obj, self.language_code
)
request = types.session.DetectIntentRequest(
session=session_path,
query_input=query_input,
query_params=query_params,
)
logging.debug("query_params: %s", query_params)
logging.debug("request %s", request)
response = None
try:
response = session_client.detect_intent(request=request)
except core_exceptions.InternalServerError as err:
logging.error(
"---- ERROR --- InternalServerError caught on CX.detect %s", err
)
logging.error("text: %s", text)
logging.error("query_params: %s", query_params)
logging.error("query_input: %s", query_input)
return {
"response_messages": (
f"""---- ERROR --- InternalServerError caught on CX.detect,
{err}"""
),
"confidence": "",
"page_name": "",
"intent_name": "",
"match_type": "",
"match": None,
"params": "",
}
except core_exceptions.ClientError as err:
logging.error(
"---- ERROR ---- ClientError caught on CX.detect %s", err
)
template = "An exception of type {0} occurred. \nArguments:\n{1!r}"
message = template.format(type(err).__name__, err.args)
logging.error("err name %s", message)
logging.error("text %s", text)
logging.error("query_params %s", query_params)
logging.error("query_input %s", query_input)
logging.error(traceback.print_exc())
retries += 1
if retries < MAX_RETRIES:
logging.error("retrying")
return self.reply(send_obj, restart=restart, retries=retries)
else:
logging.error("MAX_RETRIES exceeded")
return {
"response_messages": (
f"""---- ERROR --- ClientError caught on CX.detect,
{err}"""
),
"confidence": "",
"page_name": "",
"intent_name": "",
"match_type": "",
"match": None,
"params": "",
}
if checkpoints:
self.checkpoint("<< got response")
query_result = response.query_result
logging.debug("dfcx>qr %s", query_result)
self.query_result = query_result
reply = {}
# Gather Response Messages into List of Dicts
if query_result.response_messages:
response_messages = self._gather_response_messages(
query_result.response_messages
)
else:
response_messages = None
# Convert params structures from Proto to standard python data types
if query_result.parameters:
params = self._gather_query_result_parameters(
query_result.parameters
)
else:
params = None
reply["response_messages"] = response_messages
reply["confidence"] = query_result.intent_detection_confidence
reply["page_name"] = query_result.current_page.display_name
reply["intent_name"] = query_result.intent.display_name
reply["match_type"] = self._get_match_type_from_map(
query_result.match.match_type
)
reply["match"] = query_result.match
reply["params"] = params
logging.debug("reply %s", reply)
return reply
def getpath(self, obj, xpath, default=None):
"""Get data at a pathed location out of object internals"""
elem = obj
try:
for xpitem in xpath.strip("/").split("/"):
try:
xpitem = int(xpitem)
elem = elem[xpitem] # dict
except ValueError:
elem = elem.get(xpitem) # array
except KeyError:
logging.warning("failed to getpath: %s ", xpath)
return default
logging.info("OK getpath: %s", xpath)
if self:
return elem
return None
def run_intent_detection(
self,
test_set: pd.DataFrame,
chunk_size: int = 300,
rate_limit: float = 20,
):
"""Tests a set of utterances for intent detection against a CX Agent.
This function uses Python Threading to run tests in parallel to
expedite intent detection testing for Dialogflow CX agents. The default
quota for Text requests/min is 1200. Ref:
https://cloud.google.com/dialogflow/quotas#table
Args:
test_set: A Pandas DataFrame with the following schema.
flow_display_name: str
page_display_name: str
- NOTE, when using the Default Start Page of a Flow you must
define it as the special display name START_PAGE
utterance: str
inject_parameters (optional): str
end_user_metadata (optional): str
chunk_size: Determines the number of text requests to send in
parallel. This should be adjusted based on your test_set size and
the Quota limits set for your GCP project. Default is 300.
rate_limit: Number of seconds to wait between running test set chunks
Returns:
A Pandas DataFrame consisting of the original
DataFrame plus an additional column for the detected intent with
the following schema.
flow_display_name: str
page_display_name: str
utterance: str
detected_intent: str
confidence: float
target_page: str
response_messages: str
match_type: str
parameters_set: str
"""
result = pd.DataFrame()
for start in range(0, test_set.shape[0], chunk_size):
test_set_chunk = test_set.iloc[start : start + chunk_size]
result_chunk = self._get_intent_detection(test_set=test_set_chunk)
result = pd.concat([result, result_chunk])
self.progress_bar(start, test_set.shape[0])
time.sleep(rate_limit)
self.progress_bar(test_set.shape[0], test_set.shape[0])
result = self._unpack_match(result)
return result
def _unpack_match(self, df: pd.DataFrame):
""" Unpacks a 'match' column into four component columns.
if a match column is None, then all four columns will be None.
Args:
df: dataframe containing a column named match of types.Match
Returns:
A copy of df with columns match_type, confidence, parameters_set,
and detected_intent instead of match.
"""
return (
df.copy()
.assign(
match_type = lambda df: df.match.apply(
# pylint: disable=W0212
lambda match_value: match_value.match_type._name_
if match_value else ""
),
confidence = lambda df: df.match.apply(
lambda match_value: match_value.confidence
if match_value else ""
),
parameters_set = lambda df: df.match.apply(
lambda match_value: match_value.parameters
if match_value else ""
),
detected_intent = lambda df: df.match.apply(
lambda match_value: match_value.intent.display_name
if match_value else ""
)
)
.assign(
parameters_set = lambda df: df.parameters_set.apply(
lambda parameter: self.recurse_proto_marshal_to_dict(
parameter) if parameter else "")
)
.drop(columns="match")
)