In the previous post, we explored how to configure LangChain using Hugging Face Transformers. I created my own custom class that can be used similarly to the ChatOpenAI() class. I hope it proves to be useful wherever needed. If there are any issues or corrections, please feel free to reach out.
Directory
1
2
3
4
5
6
7
8
| └── ai
├── local
│ └── langchain_brainai.py
│ ├── ChatBrainAI(AIModelBase)
│ ├── stream_response()
│ └── invoke_response()
└── aibase.py
└── AIModelBase(metaclass=ABCMeta)
|
My Own Custom class, ChatBrainAI
1
2
3
4
5
6
7
8
9
10
11
| from abc import ABCMeta
from abc import abstractmethod
class AIModelBase(metaclass=ABCMeta):
@abstractmethod
def __init__(self):
pass
@abstractmethod
def generate(sef):
pass
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
| class ChatBrainAI(AIModelBase):
def __init__(
self,
model_name: str = "gemma-2-2b-it",
temperature: float = 0.1,
):
try:
path = os.path.join(os.getcwd(), "Pretrained_byGit")
self.__model_name = os.path.join(path, model_name)
self.__temperature = temperature
# 1.Load the model and tokenizer using transformers
self.__tokenizer = AutoTokenizer.from_pretrained(self.__model_name)
self.__model = AutoModelForCausalLM.from_pretrained(
self.__model_name, device_map="auto"
)
# Some parameter settings are required depending on the local model.
if self.__tokenizer.pad_token_id is None:
self.__tokenizer.pad_token_id = 0
print(f"Pad token ID is set to: {self.__tokenizer.pad_token_id}")
else:
print(f"Pad token ID already set: {self.__tokenizer.pad_token_id}")
# 2.Configure the pipeline
self.__pipe = pipeline(
"text-generation"
model=self.__model,
tokenizer=self.__tokenizer,
max_new_tokens=512,
do_sample=True,
temperature=self.__temperature,
pad_token_id=self.__tokenizer.pad_token_id,
)
# 3.Create a HuggingFacePipeline object for LangChain
self.__pipeline = HuggingFacePipeline(pipeline=self.__pipe)
except Exception as e:
print(f"Error occurred while creating LangChain:\n{e}")
self.__pipeline = None
def generate(self, prompt: str):
if self.__pipeline is None:
return None
try:
response = self.__pipeline
return response
except Exception as e:
print(f"Error occurred: {e}")
return None
# Use the object like a function, similar to the ChatOpenAI()
def __call__(self, prompt: str):
return self.generate(prompt)
|
Methods for output when called via invoke() and stream()
- invoke_response
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| from langchain_core.messages import AIMessageChunk, AIMessage
def invoke_response(response, suffix="", return_output=False):
if suffix != "":
answer = response.split(suffix)[-1].strip()
print(answer)
else:
if isinstance(response, AIMessage):
# langchain_core.messages.ai.AIMessage
answer = response.content
print(answer)
elif isinstance(response, str):
answer = response
print(answer)
else:
print(f"The type of response is ... {type(response)}...\n Please check!")
if return_output:
return answer
|
- stream_response
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| from langchain_core.messages import AIMessageChunk, AIMessage
def stream_response(response, return_output=False):
answer = ""
for chunk in response:
if isinstance(chunk, AIMessageChunk):
# langchain_core.messages.ai.AIMessageChunk
answer += chunk.content
print(chunk.content, end="", flush=True)
elif isinstance(chunk, str):
answer += chunk
print(chunk, end="", flush=True)
else:
print(f"The type of chunk is ... {type(chunk)}...\n Please check!")
if return_output:
return answer
|
Example using ChatBrainAI()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| from ai.local.langchain_brainai import ChatBrainAI, stream_response, invoke_response
from langchain_core.prompts import PromptTemplate
llm = ChatBrainAI('Llama-3.2-3B-Instruct')
template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a friendly AI assistant. Your name is DS2Man. Please answer questions briefly.
<|eot_id|><|start_header_id|>user<|end_header_id|>{question}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
prompt = PromptTemplate.from_template(
template
)
chain = prompt | llm
|
1
2
3
| question = "What is the capital of the United States?"
response = chain.stream({"question": question})
stream_response(response)
|
1
| The capital of the United States is Washington, D.C.
|
1
2
3
| question = "What is the capital of the United States?"
response = chain.invoke({"question": question})
invoke_response(response, "<|end_header_id|>")
|
1
| The capital of the United States is Washington, D.C.
|