llama.cpp

./bin/main --color -b 24 -n -1 --temp 0 -ngl 1 -ins --model '/Users/yang/Downloads/openhermes-2.5-mistral-7b-16k.Q5_K_M.gguf'

General Python

Install packages

!pip install -q transformers[torch]
!pip install xformers
!pip install -q datasets
!pip install -q trl
!pip install git+https://github.com/huggingface/peft.git
!pip install -q bitsandbytes==0.37.2
!pip install -q -U accelerate
!pip install evaluate

HF libs

from huggingface_hub import login
login()

# option 1: notebook login
from huggingface_hub import notebook_login
notebook_login() # ensure token gives write access

# # option 2: key login
# from huggingface_hub import login
# write_key = 'hf_' # paste token here
# login(write_key)

hf_name = 'shawhin' # your hf username or org name
model_id = hf_name + "/" + model_checkpoint + "-lora-text-classification" # you can name the model whatever you want

model.push_to_hub(model_id) # save model

trainer.push_to_hub(model_id) # save trainer

# how to load peft model from hub for inference
config = PeftConfig.from_pretrained(model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path, num_labels=2, id2label=id2label, label2id=label2id
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(inference_model, model_id)

Load models

repo_id = "meta-llama/Llama-2-7b-chat-hf" # Modify to whatever model you want to use

base_model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map='auto',
    load_in_8bit=True,
    trust_remote_code=True,
)

model = AutoModelForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=2, id2label=id2label, label2id=label2id)

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)

Load datasets

load_dataset('yelp_review_full')

load_dataset("shawhin/imdb-truncated")

# Local
dataset = load_dataset("csv", data_files = "you_data_here.csv")

Process datasets

small_eval_dataset = dataset["test"].shuffle(seed=42).select(range(1000))

Tokenizer

tokenizer = AutoTokenizer.from_pretrained(repo_id)

# Maybe one of:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
# or
tokenizer.pad_token = tokenizer.eos_token

# create tokenize function
def tokenize_function(examples):
    # extract text
    text = examples["text"]

    #tokenize and truncate text
    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors="np",
        truncation=True,
        max_length=512
    )

    return tokenized_inputs

# tokenize training and validation datasets
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset

# for dynamic padding to Trainer - more efficient than padding everything to max_length
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Classification

# tokenize text
inputs = tokenizer.encode(text, return_tensors="pt")
# compute logits
logits = model(inputs).logits
# convert logits to label
predictions = torch.argmax(logits)
id2label[predictions.tolist()]

Eval

import numpy as np
import evaluate

# This one is good for classification
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)