Managing prompt size
Agents dynamically call tools. The results of those tool calls are added back to the prompt, so that the agent can plan the next action. Depending on what tools are being used and how they're being called, the agent prompt can easily grow larger than the model context window.
With LCEL, it's easy to add custom functionality for managing the size of prompts within your chain or agent. Let's look at simple agent example that can search Wikipedia for information.
%pip install --upgrade --quiet langchain langchain-openai wikipedia
from operator import itemgetter
from langchain.agents import AgentExecutor, load_tools
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_core.prompt_values import ChatPromptValue
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
wiki = WikipediaQueryRun(
api_wrapper=WikipediaAPIWrapper(top_k_results=5, doc_content_chars_max=10_000)
)
tools = [wiki]
prompt = ChatPromptTemplate.from_messages(
[
("system", "You are a helpful assistant"),
("user", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
]
)
llm = ChatOpenAI(model="gpt-3.5-turbo")
Let's try a many-step question without any prompt size handling:
agent = (
{
"input": itemgetter("input"),
"agent_scratchpad": lambda x: format_to_openai_function_messages(
x["intermediate_steps"]
),
}
| prompt
| llm.bind_functions(tools)
| OpenAIFunctionsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke(
{
"input": "Who is the current US president? What's their home state? What's their home state's bird? What's that bird's scientific name?"
}
)
Unfortunately we run out of space in our model's context window before we the agent can get to the final answer. Now let's add some prompt handling logic. To keep things simple, if our messages have too many tokens we'll start dropping the earliest AI, Function message pairs (this is the model tool invocation message and the subsequent tool output message) in the chat history.
def condense_prompt(prompt: ChatPromptValue) -> ChatPromptValue:
messages = prompt.to_messages()
num_tokens = llm.get_num_tokens_from_messages(messages)
ai_function_messages = messages[2:]
while num_tokens > 4_000:
ai_function_messages = ai_function_messages[2:]
num_tokens = llm.get_num_tokens_from_messages(
messages[:2] + ai_function_messages
)
messages = messages[:2] + ai_function_messages
return ChatPromptValue(messages=messages)
agent = (
{
"input": itemgetter("input"),
"agent_scratchpad": lambda x: format_to_openai_function_messages(
x["intermediate_steps"]
),
}
| prompt
| condense_prompt
| llm.bind_functions(tools)
| OpenAIFunctionsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke(
{
"input": "Who is the current US president? What's their home state? What's their home state's bird? What's that bird's scientific name?"
}
)
{'input': "Who is the current US president? What's their home state? What's their home state's bird? What's that bird's scientific name?",
'output': 'The current US president is Joe Biden. His home state is Delaware. The home state bird of Delaware is the Delaware Blue Hen. The scientific name of the Delaware Blue Hen is Gallus gallus domesticus.'}