Skip to content

Commit

Permalink
Merge pull request #91 from alan-turing-institute/default-run-args
Browse files Browse the repository at this point in the history
Fix #87 : Set default run args
  • Loading branch information
rwood-97 authored Sep 18, 2023
2 parents 7039775 + b951b02 commit 727bb20
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 73 deletions.
123 changes: 63 additions & 60 deletions slack_bot/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ async def main():
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
"--model", "-m", help="Select which model to use", default=None, choices=MODELS
"--model",
"-m",
help="Select which model to use",
default=os.environ.get("REGINALD_MODEL") or "hello",
choices=MODELS,
)
parser.add_argument(
"--model-name",
Expand All @@ -45,17 +49,18 @@ async def main():
"(ignored if not using llama-index-llama-cpp or llama-index-hf). "
"Default is 'chat'."
),
default=None,
default=os.environ.get("LLAMA_INDEX_MODE") or "chat",
choices=["chat", "query"],
)
parser.add_argument(
"--path",
"--is-path",
"-p",
help=(
"Whether or not the model_name passed is a path to the model "
"(ignored if not using llama-index-llama-cpp)"
),
action="store_true",
action=argparse.BooleanOptionalAction,
default=None,
)
parser.add_argument(
"--max-input-size",
Expand All @@ -65,7 +70,7 @@ async def main():
"Select maximum input size for LlamaCPP or HuggingFace model "
"(ignored if not using llama-index-llama-cpp or llama-index-hf)"
),
default=4096,
default=os.environ.get("LLAMA_INDEX_MAX_INPUT_SIZE") or 4096,
)
parser.add_argument(
"--n-gpu-layers",
Expand All @@ -75,7 +80,7 @@ async def main():
"Select number of GPU layers for LlamaCPP model "
"(ignored if not using llama-index-llama-cpp)"
),
default=0,
default=os.environ.get("LLAMA_INDEX_N_GPU_LAYERS") or 0,
)
parser.add_argument(
"--device",
Expand All @@ -85,20 +90,22 @@ async def main():
"Select device for HuggingFace model "
"(ignored if not using llama-index-hf model)"
),
default="auto",
default=os.environ.get("LLAMA_INDEX_DEVICE") or "auto",
)
parser.add_argument(
"--force-new-index",
"-f",
help="Recreate the index vector store or not",
action="store_true",
action=argparse.BooleanOptionalAction,
default=None,
)
parser.add_argument(
"--data-dir",
"-d",
type=pathlib.Path,
help="Location for data",
default=None,
default=os.environ.get("LLAMA_INDEX_DATA_DIR")
or (pathlib.Path(__file__).parent.parent / "data").resolve(),
)
parser.add_argument(
"--which-index",
Expand All @@ -111,7 +118,7 @@ async def main():
"files in the data directory, 'handbook' will "
"only use 'handbook.csv' file."
),
default=None,
default=os.environ.get("LLAMA_INDEX_WHICH_INDEX") or "all_data",
choices=["all_data", "public", "handbook"],
)

Expand All @@ -124,83 +131,79 @@ async def main():
level=logging.INFO,
)

# Set model name
model_name = os.environ.get("REGINALD_MODEL")
if args.model:
model_name = args.model
if not model_name:
model_name = "hello"

# Set force new index
# Set force new index (by default, don't)
force_new_index = False
if os.environ.get("LLAMA_FORCE_NEW_INDEX"):
force_new_index = os.environ.get("LLAMA_FORCE_NEW_INDEX").lower() == "true"
if args.force_new_index:
force_new_index = True

# Set data directory
data_dir = os.environ.get("LLAMA_DATA_DIR")
if args.data_dir:
data_dir = args.data_dir
if not data_dir:
data_dir = pathlib.Path(__file__).parent.parent / "data"
data_dir = pathlib.Path(data_dir).resolve()

# Set which index
which_index = os.environ.get("LLAMA_WHICH_INDEX")
if args.which_index:
which_index = args.which_index
if not which_index:
which_index = "all_data"

# Set mode
mode = os.environ.get("LLAMA_MODE")
if args.mode:
mode = args.mode
if not mode:
mode = "chat"
# try to obtain force_new_index from env var
if os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX"):
force_new_index = (
os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true"
)
# if force_new_index is provided via command line, override env var
if args.force_new_index is not None:
force_new_index = args.force_new_index

# Set is_path bool (by default, False)
is_path = False
# try to obtain is_path from env var
if os.environ.get("LLAMA_INDEX_IS_PATH"):
is_path = os.environ.get("LLAMA_INDEX_IS_PATH").lower() == "true"
# if is_path bool is provided via command line, override env var
if args.is_path is not None:
is_path = args.is_path

# Initialise a new Slack bot with the requested model
try:
model = MODELS[model_name.lower()]
model = MODELS[args.model.lower()]
except KeyError:
logging.error(f"Model {model_name} was not recognised")
logging.error(f"Model {args.model} was not recognised")
sys.exit(1)

# Initialise LLM reponse model
logging.info(f"Initialising bot with model: {model_name}")
logging.info(f"Initialising bot with model: {args.model}")

# Set up any model args that are required
if model_name == "llama-index-llama-cpp":
if args.model_name is None:
args.model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL
if args.model == "llama-index-llama-cpp":
# try to obtain model name from env var
# if model name is provided via command line, override env var
model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")

# if no model name is provided by command line or env var,
# default to DEFAULT_LLAMA_CPP_GGUF_MODEL
if model_name is None:
model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL

model_args = {
"model_name": args.model_name,
"path": args.path,
"model_name": model_name,
"is_path": is_path,
"n_gpu_layers": args.n_gpu_layers,
"max_input_size": args.max_input_size,
}
elif model_name == "llama-index-hf":
if args.model_name is None:
args.model_name = DEFAULT_HF_MODEL
elif args.model == "llama-index-hf":
# try to obtain model name from env var
# if model name is provided via command line, override env var
model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")

# if no model name is provided by command line or env var,
# default to DEFAULT_HF_MODEL
if model_name is None:
model_name = DEFAULT_HF_MODEL

model_args = {
"model_name": args.model_name,
"model_name": model_name,
"device": args.device,
"max_input_size": args.max_input_size,
}
else:
model_args = {}

if model_name == "hello":
if model == "hello":
response_model = model()
else:
response_model = model(
force_new_index=force_new_index,
data_dir=data_dir,
which_index=which_index,
mode=mode,
data_dir=args.data_dir,
which_index=args.which_index,
mode=args.mode,
**model_args,
)

Expand Down
31 changes: 18 additions & 13 deletions slack_bot/slack_bot/models/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(
The type of engine to use when interacting with the data, options of "chat" or "query".
Default is "chat".
k : int, optional
`similarity_top_k` to use in query engine, by default 3
`similarity_top_k` to use in char or query engine, by default 3
chunk_overlap_ratio : float, optional
Chunk overlap as a ratio of chunk size, by default 0.1
force_new_index : bool, optional
Expand All @@ -79,6 +79,14 @@ def __init__(
"""
super().__init__(emoji="llama")
logging.info("Setting up Huggingface backend.")
if mode == "chat":
logging.info("Setting up chat engine.")
elif mode == "query":
logging.info("Setting up query engine.")
else:
logging.error("Mode must either be 'query' or 'chat'.")
sys.exit(1)

self.max_input_size = max_input_size
self.model_name = model_name
self.num_output = num_output
Expand Down Expand Up @@ -138,17 +146,14 @@ def __init__(
storage_context=storage_context, service_context=service_context
)

if self.mode == "query":
self.query_engine = self.index.as_query_engine(similarity_top_k=k)
logging.info("Done setting up Huggingface backend for query engine.")
elif self.mode == "chat":
if self.mode == "chat":
self.chat_engine = self.index.as_chat_engine(
chat_mode="context", similarity_top_k=k
)
logging.info("Done setting up Huggingface backend for chat engine.")
else:
logging.error("Mode must either be 'query' or 'chat'.")
sys.exit(1)
elif self.mode == "query":
self.query_engine = self.index.as_query_engine(similarity_top_k=k)
logging.info("Done setting up Huggingface backend for query engine.")

self.error_response_template = (
"Oh no! When I tried to get a response to your prompt, "
Expand Down Expand Up @@ -356,7 +361,7 @@ class LlamaIndexLlamaCPP(LlamaIndex):
def __init__(
self,
model_name: str,
path: bool,
is_path: bool,
n_gpu_layers: int = 0,
*args: Any,
**kwargs: Any,
Expand All @@ -369,14 +374,14 @@ def __init__(
----------
model_name : str
Either the path to the model or the URL to download the model from
path : bool, optional
is_path : bool, optional
If True, model_name is used as a path to the model file,
otherwise it should be the URL to download the model
n_gpu_layers : int, optional
Number of layers to offload to GPU.
If -1, all layers are offloaded, by default 0
"""
self.path = path
self.is_path = is_path
self.n_gpu_layers = n_gpu_layers
super().__init__(*args, model_name=model_name, **kwargs)

Expand All @@ -389,8 +394,8 @@ def _prep_llm(self) -> LLM:
)

return LlamaCPP(
model_url=self.model_name if not self.path else None,
model_path=self.model_name if self.path else None,
model_url=self.model_name if not self.is_path else None,
model_path=self.model_name if self.is_path else None,
temperature=0.1,
max_new_tokens=self.num_output,
context_window=self.max_input_size,
Expand Down

0 comments on commit 727bb20

Please sign in to comment.