(AI ML 04) From 25% to 83% accuracy! LoRA fine tuning of FLAN T5 for finance, FinGPT part 2

In my opinion, the societal impact of efficient fine-tuning techniques matters even more than their technical importance. They help democratize AI by making it accessible to independent researchers and startups. With these methods, anyone can fine-tune powerful models without massive costs or supercomputers. This will also spread AI innovation across society and industry, beyond the big tech companies.

In this video, we work on an advanced technique for fine-tuning the FLAN-T5 LLM model. Its financial text classification accuracy boosts dramatically using LoRA — going from just 25% up to an impressive 83%. I present the process step by step, showing how a parameter-efficient fine tuning approach can transform a small LLM model’s performance without requiring massive computing resources.

Whether you’re an AI practitioner working in finance, a research engineer exploring new transfer-learning strategies, or simply curious about how LoRA bridges the gap between large-scale models and real-world domain tasks — this walkthrough gives you concrete code snippets, insights on hyper-parameter tuning, and lessons from our experiments. Get ready to shift your accuracy baseline and push your model into the next level.

The code is available in a Python notebook on my Github https://github.com/saidplayer/AI-and-Machine-Learning/blob/main/FLAN_T5_LoRa_Finetuning.ipynb

from transformers import (
    T5ForConditionalGeneration,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from peft import (
    PeftModel,
    TaskType,
    LoraConfig,
    get_peft_model,
)
import datasets

from transformers import (
    T5ForConditionalGeneration,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from peft import (
    PeftModel,
    TaskType,
    LoraConfig,
    get_peft_model,
)
import datasets

model_name = "./saved_models/Base"

model_name = "./saved_models/Base"

def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
print_trainable_parameters(model)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
print_trainable_parameters(model)

trainable params: 247577856 || all params: 247577856 || trainable%: 100.00

trainable params: 247577856 || all params: 247577856 || trainable%: 100.00

lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    r=10,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q","v"],
    bias='none',
)
model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    r=10,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q","v"],
    bias='none',
)
model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 1105920 || all params: 248683776 || trainable%: 0.44

trainable params: 1105920 || all params: 248683776 || trainable%: 0.44

dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")

def clean(example):
    example["context"] = example["context"].split("http")[0]
    return example
dataset = dataset.map(clean)
dataset = dataset.train_test_split(0.1, shuffle=True, seed = 42)

dataset

dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")

def clean(example):
    example["context"] = example["context"].split("http")[0]
    return example
dataset = dataset.map(clean)
dataset = dataset.train_test_split(0.1, shuffle=True, seed = 42)

dataset

DatasetDict({
    train: Dataset({
        features: ['context', 'target'],
        num_rows: 8588
    })
    test: Dataset({
        features: ['context', 'target'],
        num_rows: 955
    })
})

DatasetDict({
    train: Dataset({
        features: ['context', 'target'],
        num_rows: 8588
    })
    test: Dataset({
        features: ['context', 'target'],
        num_rows: 955
    })
})

def data_collator(batch):
    model_inputs = tokenizer(
        [ex["context"] for ex in batch],
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )

    labels = tokenizer(
        [ex["target"] for ex in batch],
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )["input_ids"]

    labels[labels == tokenizer.pad_token_id] = -100
    model_inputs["labels"] = labels

    return model_inputs

def data_collator(batch):
    model_inputs = tokenizer(
        [ex["context"] for ex in batch],
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )

    labels = tokenizer(
        [ex["target"] for ex in batch],
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )["input_ids"]

    labels[labels == tokenizer.pad_token_id] = -100
    model_inputs["labels"] = labels

    return model_inputs

training_args = TrainingArguments(
        output_dir='./saved_models/training_log',
        logging_steps = 10,
        num_train_epochs = 3,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=2,
        learning_rate=1e-4,
        warmup_steps=200,
        weight_decay=0.01,
        # fp16=True,
        torch_compile = False,
        remove_unused_columns=False,
    )

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=data_collator,
)

trainer.train()

model.save_pretrained("./saved_models/finetuned_model")

training_args = TrainingArguments(
        output_dir='./saved_models/training_log',
        logging_steps = 10,
        num_train_epochs = 3,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=2,
        learning_rate=1e-4,
        warmup_steps=200,
        weight_decay=0.01,
        # fp16=True,
        torch_compile = False,
        remove_unused_columns=False,
    )

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=data_collator,
)

trainer.train()

model.save_pretrained("./saved_models/finetuned_model")

Assessing the performance before and after fine tuning

model_size = "base"

base_model = T5ForConditionalGeneration.from_pretrained("./saved_models/" + model_size)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-" + model_size)

X = list(dataset["test"]['context'])
Y = list(dataset["test"]['target'])

model_size = "base"

base_model = T5ForConditionalGeneration.from_pretrained("./saved_models/" + model_size)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-" + model_size)

X = list(dataset["test"]['context'])
Y = list(dataset["test"]['target'])

tokenizer.decode(base_model.generate(input_ids=tokenizer(X[10], return_tensors="pt")["input_ids"], attention_mask=tokenizer(X[10], return_tensors="pt")["attention_mask"])[0], skip_special_tokens=True)

tokenizer.decode(base_model.generate(input_ids=tokenizer(X[10], return_tensors="pt")["input_ids"], attention_mask=tokenizer(X[10], return_tensors="pt")["attention_mask"])[0], skip_special_tokens=True)

'negative'

'negative'

import copy
peft_model = PeftModel.from_pretrained(copy.deepcopy(base_model), "./saved_models/fully_finetuned")
tokenizer.decode(peft_model.generate(input_ids=tokenizer(X[10], return_tensors="pt")["input_ids"], attention_mask=tokenizer(X[10], return_tensors="pt")["attention_mask"])[0], skip_special_tokens=True)

import copy
peft_model = PeftModel.from_pretrained(copy.deepcopy(base_model), "./saved_models/fully_finetuned")
tokenizer.decode(peft_model.generate(input_ids=tokenizer(X[10], return_tensors="pt")["input_ids"], attention_mask=tokenizer(X[10], return_tensors="pt")["attention_mask"])[0], skip_special_tokens=True)

'neutral'

'neutral'

import copy, time

x0 = 0
n = len(X)

def pred_base(example):
    res =tokenizer(example, return_tensors="pt")
    res = base_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"])
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_base = [pred_base(x) for x in X[x0:x0+n]]
base_accuracy = sum([Ypred_base[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Base Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Base Accuracy: {base_accuracy:.0f}%")



def pred_peft(example):
    res =tokenizer(example, return_tensors="pt")
    res = peft_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"], max_new_tokens=5)
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_peft      = [pred_peft(x) for x in X[x0:x0+n]]
peft_accuracy   = sum([Ypred_peft[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Peft Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Peft Accuracy: {peft_accuracy:.0f}%")

import copy, time

x0 = 0
n = len(X)

def pred_base(example):
    res =tokenizer(example, return_tensors="pt")
    res = base_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"])
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_base = [pred_base(x) for x in X[x0:x0+n]]
base_accuracy = sum([Ypred_base[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Base Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Base Accuracy: {base_accuracy:.0f}%")



def pred_peft(example):
    res =tokenizer(example, return_tensors="pt")
    res = peft_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"], max_new_tokens=5)
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_peft      = [pred_peft(x) for x in X[x0:x0+n]]
peft_accuracy   = sum([Ypred_peft[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Peft Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Peft Accuracy: {peft_accuracy:.0f}%")

Base Inference: 0.161 sec/sample
Base Accuracy: 25%
Peft Inference: 0.171 sec/sample
Peft Accuracy: 83%

Base Inference: 0.161 sec/sample
Base Accuracy: 25%
Peft Inference: 0.171 sec/sample
Peft Accuracy: 83%

from sklearn.metrics import confusion_matrix
print(f"{(confusion_matrix(Y, Ypred_base, normalize='true').round(3) *100)}\n")
print(f"{(confusion_matrix(Y, Ypred_peft, normalize='true').round(3) *100)}")

from sklearn.metrics import confusion_matrix
print(f"{(confusion_matrix(Y, Ypred_base, normalize='true').round(3) *100)}\n")
print(f"{(confusion_matrix(Y, Ypred_peft, normalize='true').round(3) *100)}")

[[99.3  0.   0.7]
 [70.3  0.  29.7]
 [45.7  0.  54.3]]

[[69.9 23.5  6.6]
 [ 4.8 89.1  6.1]
 [ 1.5 25.9 72.6]]

[[99.3  0.   0.7]
 [70.3  0.  29.7]
 [45.7  0.  54.3]]

[[69.9 23.5  6.6]
 [ 4.8 89.1  6.1]
 [ 1.5 25.9 72.6]]

Merging into a single model

from transformers import pipeline
import copy

peft_model = PeftModel.from_pretrained(copy.deepcopy(base_model), "./saved_models/finetuned_model")
peft_model = peft_model.merge_and_unload()
peft_model.save_pretrained("./saved_models/merged_model")

# load using pipeline
peft_model_ppln = pipeline("text2text-generation", model="./saved_models/merged_model", tokenizer="./saved_models/" + model_size)
peft_model_ppln("who is the current president of the US?")

from transformers import pipeline
import copy

peft_model = PeftModel.from_pretrained(copy.deepcopy(base_model), "./saved_models/finetuned_model")
peft_model = peft_model.merge_and_unload()
peft_model.save_pretrained("./saved_models/merged_model")

# load using pipeline
peft_model_ppln = pipeline("text2text-generation", model="./saved_models/merged_model", tokenizer="./saved_models/" + model_size)
peft_model_ppln("who is the current president of the US?")

[{'generated_text': 'Mitt Romney'}]

[{'generated_text': 'Mitt Romney'}]

t0 = time.time()
Ypred_peft_ppln = peft_model_ppln(X[x0:x0+n], max_new_tokens=5)
Ypred_peft_ppln = [y["generated_text"] for y in Ypred_peft_ppln]
peft_accuracy_ppln = sum([Ypred_peft_ppln[i] == Y[x0+i] for i in range(n)]) / n
t1 = time.time()
print(f"Pipeline Inference (sam/sec) {(t1-t0)/n:.3f} ")
print(f"Peft Accuracy: {peft_accuracy_ppln:.0f}%")

t0 = time.time()
Ypred_peft_ppln = peft_model_ppln(X[x0:x0+n], max_new_tokens=5)
Ypred_peft_ppln = [y["generated_text"] for y in Ypred_peft_ppln]
peft_accuracy_ppln = sum([Ypred_peft_ppln[i] == Y[x0+i] for i in range(n)]) / n
t1 = time.time()
print(f"Pipeline Inference (sam/sec) {(t1-t0)/n:.3f} ")
print(f"Peft Accuracy: {peft_accuracy_ppln:.0f}%")

Pipeline Inference (sam/sec) 0.215
Pipeline eft Accuracy: 83%

Pipeline Inference (sam/sec) 0.215
Pipeline eft Accuracy: 83%

peft_model = T5ForConditionalGeneration.from_pretrained("./saved_models/merged_model")
def pred_peft(example):
    res =tokenizer(example, return_tensors="pt")
    res = peft_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"], max_new_tokens=5)
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_peft      = [pred_peft(x) for x in X[x0:x0+n]]
peft_accuracy   = sum([Ypred_peft[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Peft Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Peft Accuracy: {peft_accuracy:.0f}%")

peft_model = T5ForConditionalGeneration.from_pretrained("./saved_models/merged_model")
def pred_peft(example):
    res =tokenizer(example, return_tensors="pt")
    res = peft_model.generate(input_ids=res["input_ids"], attention_mask=res["attention_mask"], max_new_tokens=5)
    res = tokenizer.decode(res[0], skip_special_tokens=True)
    return res

t0 = time.time()
Ypred_peft      = [pred_peft(x) for x in X[x0:x0+n]]
peft_accuracy   = sum([Ypred_peft[i] == Y[x0+i] for i in range(n)]) / n * 100
t1 = time.time()
print(f"Peft Inference: {(t1-t0)/n:.3f} sec/sample")
print(f"Peft Accuracy: {peft_accuracy:.0f}%")

Peft Inference: 0.148 sec/sample
Peft Accuracy: 83%

Peft Inference: 0.148 sec/sample
Peft Accuracy: 83%

(AI ML 04) From 25% to 83% accuracy! LoRA fine tuning of FLAN T5 for finance, FinGPT part 2

Leave a Reply Cancel reply

Archives

Categories