FiveTech Software tech support forums

by **Antonio Linares** » Mon Jan 08, 2024 10:27 pm

go.bat

Code: Select all Expand view: pip install -q -U bitsandbytes pip install -q -U git+https://github.com/huggingface/transformers.git pip install -q -U git+https://github.com/huggingface/peft.git pip install -q -U git+https://github.com/huggingface/accelerate.git pip install -q -U datasets scipy ipywidgets matplotlib einops pip install -q -U torch pip install -q -U trl pip install -q -U huggingface_hub

phi2_finetune.py

Code: Select all Expand view: from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, AutoTokenizer, TrainingArguments, ) from datasets import load_dataset import peft import torch from peft import ( LoraConfig, PeftConfig, get_peft_model, prepare_model_for_kbit_training, ) from trl import SFTTrainer from huggingface_hub import notebook_login base_model = "microsoft/phi-2" new_model = "phi-2-fivetech_forums" dataset = load_dataset('fivetech/forums2') tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True) tokenizer.pad_token=tokenizer.eos_token tokenizer.padding_side="right" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=False, ) model = AutoModelForCausalLM.from_pretrained( base_model, quantization_config=bnb_config, # use_flash_attention_2=True, # Phi does not support yet. trust_remote_code=True, flash_attn=True, flash_rotary=True, fused_dense=True, low_cpu_mem_usage=True, device_map={"": 0}, revision="refs/pr/23", ) model.config.use_cache = False model.config.pretraining_tp = 1 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) training_arguments = TrainingArguments( output_dir="./results", num_train_epochs=80, per_device_train_batch_size=2, gradient_accumulation_steps=32, evaluation_strategy="steps", eval_steps=2000, logging_steps=15, optim="paged_adamw_8bit", learning_rate=2e-4, lr_scheduler_type="cosine", save_steps=2000, warmup_ratio=0.05, weight_decay=0.01, report_to="tensorboard", max_steps=-1, # if maximum steps=2, it will stop after two steps ) peft_config = LoraConfig( r=32, lora_alpha=64, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules= ["Wqkv", "fc1", "fc2" ] # ["Wqkv", "out_proj", "fc1", "fc2" ], - 41M params # modules_to_save=["embed_tokens","lm_head"] ) trainer = SFTTrainer( model=model, train_dataset=dataset['train'], eval_dataset=dataset['train'], #No separate evaluation dataset, i am using the same dataset peft_config=peft_config, dataset_text_field="topic", # "text" max_seq_length=690, tokenizer=tokenizer, args=training_arguments, ) trainer.train() trainer.save_model( new_model ) #notebook_login() #trainer.push_to_hub()

by **Antonio Linares** » Mon Jan 08, 2024 10:52 pm

For those of you that have a computer with GPU, we do appreciate if you can test it and report us how it works

many thanks!

by **Antonio Linares** » Mon Jan 15, 2024 6:05 am

go.bat

Code: Select all Expand view: pip install -q -U bitsandbytes pip install -q -U git+https://github.com/huggingface/transformers.git pip install -q -U git+https://github.com/huggingface/peft.git pip install -q -U git+https://github.com/huggingface/accelerate.git pip install -q -U datasets scipy ipywidgets matplotlib einops

phi2-FT.py

Code: Select all Expand view: from accelerate import FullyShardedDataParallelPlugin, Accelerator from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig fsdp_plugin = FullyShardedDataParallelPlugin( state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False), optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False), ) accelerator = Accelerator(fsdp_plugin=fsdp_plugin) from datasets import load_dataset train_dataset = load_dataset('fivetech/forums') eval_dataset = load_dataset('fivetech/forums') import torch from transformers import AutoTokenizer, AutoModelForCausalLM base_model_id = "microsoft/phi-2" model = AutoModelForCausalLM.from_pretrained(base_model_id, trust_remote_code=True, torch_dtype=torch.float16, load_in_8bit=True) def formatting_func(example): text = f"### Question: {example['input']}\n ### Answer: {example['output']}" return text tokenizer = AutoTokenizer.from_pretrained( base_model_id, padding_side="left", add_eos_token=True, add_bos_token=True, use_fast=False, # needed for now, should be fixed soon ) tokenizer.pad_token = tokenizer.eos_token def generate_and_tokenize_prompt(prompt): return tokenizer(formatting_func(prompt)) tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt) tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt) import matplotlib.pyplot as plt def plot_data_lengths(tokenized_train_dataset, tokenized_val_dataset): lengths = [len(x['input_ids']) for x in tokenized_train_dataset['train']] lengths += [len(x['input_ids']) for x in tokenized_val_dataset['train']] print(len(lengths)) # Plotting the histogram plt.figure(figsize=(10, 6)) plt.hist(lengths, bins=20, alpha=0.7, color='blue') plt.xlabel('Length of input_ids') plt.ylabel('Frequency') plt.title('Distribution of Lengths of input_ids') plt.show() plot_data_lengths(tokenized_train_dataset, tokenized_val_dataset) max_length = 512 # This was an appropriate max length for my dataset def generate_and_tokenize_prompt2(prompt): result = tokenizer( formatting_func(prompt), truncation=True, max_length=max_length, padding="max_length", ) result["labels"] = result["input_ids"].copy() return result tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt2) tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt2) eval_prompt = "Como crear indices" # Init an eval tokenizer so it doesn't add padding or eos token eval_tokenizer = AutoTokenizer.from_pretrained( base_model_id, add_bos_token=True, use_fast=False, # needed for now, should be fixed soon ) model_input = eval_tokenizer(eval_prompt, return_tensors="pt").to("cuda") model.eval() with torch.no_grad(): print(eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0], skip_special_tokens=True)) def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. """ trainable_params = 0 all_param = 0 for _, param in model.named_parameters(): all_param += param.numel() if param.requires_grad: trainable_params += param.numel() print( f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" ) from peft import LoraConfig, get_peft_model config = LoraConfig( r=32, lora_alpha=64, target_modules=[ "Wqkv", "fc1", "fc2", ], bias="none", lora_dropout=0.05, # Conventional task_type="CAUSAL_LM", ) model = get_peft_model(model, config) print_trainable_parameters(model) model = accelerator.prepare_model(model) if torch.cuda.device_count() > 1: # If more than 1 GPU model.is_parallelizable = True model.model_parallel = True import transformers from datetime import datetime project = "journal-finetune" base_model_name = "phi2" run_name = base_model_name + "-" + project output_dir = "./" + run_name trainer = transformers.Trainer( model=model, train_dataset=tokenized_train_dataset[ 'train' ], eval_dataset=tokenized_val_dataset[ 'train' ], args=transformers.TrainingArguments( output_dir=output_dir, warmup_steps=1, per_device_train_batch_size=2, gradient_accumulation_steps=1, max_steps=1000, #tenia 500 learning_rate=2.5e-5, # Want a small lr for finetuning optim="paged_adamw_8bit", logging_steps=25, # When to start reporting loss logging_dir="./logs", # Directory for storing logs save_strategy="steps", # Save the model checkpoint every logging step save_steps=25, # Save checkpoints every 50 steps evaluation_strategy="steps", # Evaluate the model every logging step eval_steps=25, # Evaluate and save checkpoints every 50 steps do_eval=True, # Perform evaluation at the end of training #report_to="wandb", # Comment this out if you don't want to use weights & baises run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" # Name of the W&B run (optional) ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! torch.cuda.empty_cache() trainer.train()

by **Antonio Linares** » Mon Jan 15, 2024 6:09 am

https://medium.com/@mohamedahmedkrichen/a-comprehensive-guide-to-fine-tuning-the-microsoft-phi-2-model-free-notebook-52a4b5e486aa

go.bat

Code: Select all Expand view: !pip install einops !pip install peft !pip install trl !pip install bitsandbytes !pip install datasets==2.16

run.py

Code: Select all Expand view: import os from dataclasses import dataclass, field from typing import Optional import pandas as pd import json import warnings import torch from datasets import load_dataset from datasets import load_from_disk from peft import LoraConfig from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, AutoTokenizer, TrainingArguments, ) from tqdm.notebook import tqdm from trl import SFTTrainer from huggingface_hub import interpreter_login os.environ["WANDB_DISABLED"] = "true" warnings.filterwarnings("ignore") df = pd.read_csv("/kaggle/input/layoutlm/medquad.csv") df = df.iloc[:,:2] df.columns = ["text",'label'] df.head() result = list(df.to_json(orient="records")) result[0] = '{"json":[' result[-1] = ']' result.append('}') result = ''.join(result) result = result.strip('"'') result = json.loads(result) with open('data.json', 'w') as json_file: json.dump(result, json_file) def formatting_func(example): text = f"### Question: {example['text']}\n ### Answer: {example['label']}" return text def generate_and_tokenize_prompt(prompt): return tokenizer(formatting_func(prompt)) #interpreter_login() bnb_config = BitsAndBytesConfig( load_in_8bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_compute_dtype='float16', bnb_4bit_use_double_quant=False, ) model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", #quantization_config=bnb_config, device_map = 'auto', trust_remote_code=True, use_auth_token=False, ) model.config.pretraining_tp = 1 peft_config = LoraConfig( r=32, lora_alpha=16, bias="none", lora_dropout=0.05, # Conventional task_type="CAUSAL_LM", ) tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token training_arguments = TrainingArguments( output_dir= "./results", num_train_epochs= 4, per_device_train_batch_size= 2, gradient_accumulation_steps= 1, optim="paged_adamw_32bit", save_strategy="epoch", logging_steps=100, logging_strategy="steps", learning_rate= 2e-4, fp16= False, bf16= False, group_by_length= True, disable_tqdm=False, report_to=None ) model.config.use_cache = False dataset = load_dataset("json", data_files="/kaggle/working/data.json", field='json', split="train") dataset = dataset.map(generate_and_tokenize_prompt) trainer = SFTTrainer( model=model, train_dataset=dataset, peft_config=peft_config, dataset_text_field="text", max_seq_length=2048, tokenizer=tokenizer, args=training_arguments, packing=False, ) trainer.train() df.iloc[5004,:]['text'],df.iloc[5004,:]['label'] inputs = tokenizer('''Instruct:What are the treatments for Acanthoma \n Output:''', return_tensors="pt", return_attention_mask=False) outputs = model.generate(**inputs, max_length=100) text = tokenizer.batch_decode(outputs[0], skip_special_tokens=True) print(''.join(text)) """torch.set_default_device("cuda") model_test = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) inputs = tokenizer('''Question: What is (are) Trigeminal Neuralgia ?\n Output:''', return_tensors="pt", return_attention_mask=False) outputs = model_test.generate(**inputs, max_length=100) text = tokenizer.batch_decode(outputs)[0] print(text)"""

by **Antonio Linares** » Mon Jan 15, 2024 7:55 am

https://medium.com/@nimritakoul01/finetuning-microsoft-phi-2-small-language-model-on-veggo-dataset-using-qlora-8bcf70ab625e

Tested on Google Colab T4

go.bat

Code: Select all Expand view: pip install accelerate==0.25.0 pip install bitsandbytes==0.41.1 pip install datasets==2.14.6 pip install peft==0.6.2 pip install transformers==4.36.2 pip install torch==2.1.0 pip install einops==0.4.1 pip install huggingface_hub

run.py

Code: Select all Expand view: import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer torch.set_default_device("cuda") model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) inputs = tokenizer("What is vedanta ?", return_tensors="pt", return_attention_mask=False) outputs = model.generate(**inputs, max_length=2000) text = tokenizer.batch_decode(outputs)[0] print(text)

What is vedanta?
Vedanta is a Sanskrit word that means "the end of the Vedas". The Vedas are the oldest scriptures of Hinduism, which contain hymns, rituals, and philosophical teachings. Vedanta is a branch of Hindu philosophy that aims to understand the nature of reality, the self, and the supreme being.

What are the main schools of Vedanta?
There are many schools of Vedanta, but the most influential ones are:

- Advaita Vedanta: This school was founded by Adi Shankara, a Hindu philosopher and theologian, in the 8th century CE. Advaita Vedanta teaches that the ultimate reality is Brahman, the supreme being, who is the same as the individual soul, the jiva. Brahman is beyond any description, perception, or distinction, and is the source of all existence. The jiva is the individual soul that is born, lives, and dies in the material world, which is also called the purusha. The jiva is not separate from Brahman, but is a part of Brahman, and can attain liberation, or moksha, by realizing this oneness.

- Vishishtadvaita Vedanta: This school was founded by Ramanuja, a Hindu philosopher and theologian, in the 11th century CE. Vishishtadvaita Vedanta teaches that the ultimate reality is Vishnu, the preserver, who is the same as the individual soul, the jiva. Vishnu is the supreme being, who is the source of all existence, but also the creator and sustainer of the material world, which is also called the prakriti. The jiva is not separate from Vishnu, but is a part of Vishnu, and can attain liberation, or moksha, by realizing this oneness.

- Dvaita Vedanta: This school was founded by Madhvacharya, a Hindu philosopher and theologian, in the 13th century CE. Dvaita Vedanta teaches that the ultimate reality is Vishnu, the preserver, who is the same as the individual soul, the jiva. Vishnu is the supreme being, who is the source of all existence, but also the creator and sustainer of the material world, which is also called the prakriti. The jiva is not separate from Vishnu, but is a part of Vishnu, and can attain liberation, or moksha, by realizing this oneness. However, Dvaita Vedanta also teaches that there are other supreme beings, such as Shiva, the destroyer, and Brahma, the creator, who are also different from Vishnu, and who have their own realms and attributes.

- Nirguna Vedanta: This school was founded by Ramanuja, a Hindu philosopher and theologian, in the 11th century CE. Nirguna Vedanta teaches that the ultimate reality is Brahman, the supreme being, who is beyond any description, perception, or distinction. Brahman is the source of all existence, but also the creator and sustainer of the material world, which is also called the prakriti. The jiva is not separate from Brahman, but is a part of Brahman, and can attain liberation, or moksha, by realizing this oneness.

What are the main concepts of Vedanta?
Some of the main concepts of Vedanta are:

- Brahman: This is the ultimate reality, the supreme being, who is the source of all existence, and who is beyond any description, perception, or distinction. Brahman is the essence of everything, and is the cause and the effect of everything. Brahman is also the creator, the sustainer, and the destroyer of everything. Brahman is also the jiva, the individual soul, who is the same as Brahman, and who can attain liberation, or moksha, by realizing this oneness.

- Jiva: This is the individual soul, who is the agent of action, the experiencer of sensation, and the bearer of consciousness. The jiva is the essence of everything, and is the cause and the effect of everything. The jiva is also the creator, the sustainer, and the destroyer of everything. The jiva is also Brahman, the supreme being, who is the same as the jiva, and who can attain liberation, or moksha, by realizing this oneness.

- Purusha: This is the material world, which is also called the prakriti. The purusha is the source of all existence, and is the cause and the effect of everything. The purusha is also the creator, the sustainer, and the destroyer of everything. The purusha is also the jiva, the individual soul, who is the same as the purusha, and who can attain liberation, or moksha, by realizing this oneness.

- Maya: This is the illusion, the ignorance, and the bondage that prevent the jiva from realizing its true nature, which is Brahman. Maya is the cause of all suffering, and the obstacle to all liberation. Maya is also the difference between the jiva and Brahman, and the distinction between the purusha and the prakriti. Maya can be overcome by knowledge, wisdom, and devotion.

- Karma: This is the action, the deed, and the consequence that bind the jiva to the cycle of birth and death, which is also called samsara. Karma is the cause of all bondage, and the obstacle to all liberation. Karma is also the difference between the jiva and Brahman, and the distinction between the purusha and the prakriti. Karma can be overcome by knowledge, wisdom, and devotion.

- Moksha: This is the liberation, the release, and the union that the jiva can attain by realizing its true nature, which is Brahman. Moksha is the cause of all happiness, and the obstacle to all suffering. Moksha is also the difference between the jiva and Brahman, and the distinction between the purusha and the prakriti. Moksha can be achieved by knowledge, wisdom, and devotion.

What are the main sources of Vedanta?
Some of the main sources of Vedanta are:

- The Vedas: These are the oldest scriptures of Hinduism, which contain hymns, rituals, and philosophical teachings. The Vedas are divided into four parts: the Rigveda, the Samaveda, the Yajurveda, and the Atharvaveda. The Vedas are considered to be the word of God, and the source of all knowledge.

- The Upanishads: These are the later scriptures of Hinduism, which contain the philosophical teachings of the Vedas. The Upanishads are divided into two parts: the minor Upanishads, which are shorter and more practical, and the major Upanishads, which are longer and more abstract. The Upanishads are considered to be the word of Brahman, and the source of all knowledge.

- The Bhagavad Gita: This is a part of the Mahabharata, one of the two great epics of Hinduism, which narrates the story of the Kurukshetra war, and the moral and spiritual dilemmas of the characters. The Bhagavad Gita is a dialogue between Lord Krishna, the supreme being, and Arjuna, a warrior prince, who is about to fight in the war. The Bhagavad Gita is considered to be the word of Krishna, and the source of all knowledge.

- The commentaries: These are the writings of the Hindu scholars and philosophers, who have interpreted and explained the Vedas, the Upanishads, and the Bhagavad Gita. The commentaries are divided into two parts: the orthodox commentaries, which follow the traditional and orthodox views of the Vedanta schools, and the heterodox commentaries, which challenge and criticize the orthodox views of the Vedanta schools. The commentaries are considered to be the word of the scholars and philosophers, and the source of all knowledge.

What are the main branches of Vedanta?
Some of the main branches of Vedanta are:

- Advaita Vedanta: This is the school of Vedanta that was founded by Adi Shankara, a Hindu philosopher and theologian, in the 8th century CE. Advaita Vedanta teaches that the ultimate reality is Brahman, the supreme being, who is the same as the individual soul, the jiva. Brahman is beyond any description, perception, or distinction, and is the source of all existence. The jiva is not separate from Brahman, but is a part of Brahman, and can attain liberation, or moksha, by realizing this oneness.

- Vishishtadvaita Vedanta: This is the school of Vedanta that was founded by Ramanuja, a Hindu philosopher and theologian, in the 11th century CE. Vishishtadvaita Vedanta teaches that the ultimate reality is Vishnu, the preserver, who is the same as the individual soul, the jiva. Vishnu is the supreme being, who is the source of all existence, but also the creator and sustainer of the material world, which is also called the prakriti. The jiva is not separate from Vishnu, but is a part of Vishnu, and can attain liberation, or moksha, by realizing

by **Antonio Linares** » Wed Jan 17, 2024 10:48 am

loading an extra trained layer to the base model Phi-2, based on:
https://medium.com/@nimritakoul01/finetuning-microsoft-phi-2-small-language-model-on-veggo-dataset-using-qlora-8bcf70ab625e

Code: Select all Expand view: !pip install accelerate==0.25.0 !pip install bitsandbytes==0.41.1 !pip install datasets==2.14.6 !pip install peft==0.6.2 !pip install transformers==4.36.2 !pip install torch==2.1.0 !pip install einops==0.4.1 # Phi needs this one

Code: Select all Expand view: import torch from transformers import AutoTokenizer, AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", load_in_8bit=True, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16, ) eval_tokenizer = AutoTokenizer.from_pretrained( "microsoft/phi-2", add_bos_token=True, trust_remote_code=True, use_fast=False, ) from peft import PeftModel ft_model = PeftModel.from_pretrained(base_model, "nimrita/phi2-finetunedonviggodataset", force_download=True) model_input = eval_tokenizer("what is zen ?", return_tensors="pt").to('cuda') ft_model = ft_model.to('cuda') ft_model.eval() with torch.no_grad(): print(eval_tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=200)[0], skip_special_tokens=True))

what is zen?
by: admin on: September 11, 2017
The meaning of zen is a form of Buddhism that is practiced by millions of people around the world. It is a way of life that emphasizes the importance of mindfulness, meditation, and self-reflection.

The practice of zen is often associated with the teachings of the Buddha, who is believed to have lived in India in the 6th century BCE. The Buddha's teachings are centered around the idea of finding inner peace and happiness through the practice of mindfulness and meditation.

One of the key principles of zen is the concept of "zenith," which refers to the peak of one's spiritual development. This is the point at which one has achieved a deep understanding of themselves and the world around them, and is able to live in harmony with others.

Another important aspect of zen is the idea of "shinzen," which refers to the process of achieving the zenith. This involves a series

by **Antonio Linares** » Wed Jan 17, 2024 6:52 pm

Locally saving a fine tuned model quantized:

Code: Select all Expand view: !pip install accelerate==0.25.0 !pip install bitsandbytes==0.41.1 !pip install datasets==2.14.6 !pip install peft==0.6.2 !pip install transformers==4.36.2 !pip install torch==2.1.0 !pip install einops==0.4.1 # Phi needs this one

Code: Select all Expand view: import torch from transformers import AutoTokenizer, AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", load_in_8bit=True, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16, ) eval_tokenizer = AutoTokenizer.from_pretrained( "microsoft/phi-2", add_bos_token=True, trust_remote_code=True, use_fast=False, ) from peft import PeftModel ft_model = PeftModel.from_pretrained( base_model, "nimrita/phi2-finetunedonviggodataset", force_download=True ) ft_model = ft_model.merge_and_unload() ft_model.save_pretrained( "./Phi2-FT" ) eval_tokenizer.save_pretrained( "./Phi2-FT" )

by **Antonio Linares** » Wed Jan 17, 2024 7:13 pm

Locally saving a fine tuned model without modifying quantization:

Code: Select all Expand view: !pip install accelerate==0.25.0 !pip install bitsandbytes==0.41.1 !pip install datasets==2.14.6 !pip install peft==0.6.2 !pip install transformers==4.36.2 !pip install torch==2.1.0 !pip install einops==0.4.1 # Phi needs this one

Code: Select all Expand view: import torch from transformers import AutoTokenizer, AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", #load_in_8bit=True, device_map="auto", trust_remote_code=True, #torch_dtype=torch.float16, ) eval_tokenizer = AutoTokenizer.from_pretrained( "microsoft/phi-2", add_bos_token=True, trust_remote_code=True, use_fast=False, ) from peft import PeftModel ft_model = PeftModel.from_pretrained( base_model, "nimrita/phi2-finetunedonviggodataset", force_download=True ) ft_model = ft_model.merge_and_unload() ft_model.save_pretrained( "./Phi2-FT" ) eval_tokenizer.save_pretrained( "./Phi2-FT" )

by **Antonio Linares** » Wed Jan 17, 2024 7:46 pm

Loading a fine tuned model from disk: (don't use this, it consumes a huge GPU memory! Use next post)

Code: Select all Expand view: !pip install accelerate==0.25.0 !pip install bitsandbytes==0.41.1 !pip install datasets==2.14.6 !pip install peft==0.6.2 !pip install transformers==4.36.2 !pip install torch==2.1.0 !pip install einops==0.4.1 # Phi needs this one

Code: Select all Expand view: import torch from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( "./Phi2-FT", load_in_8bit=True, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16, ) tokenizer = AutoTokenizer.from_pretrained( "./Phi2-FT", add_bos_token=True, trust_remote_code=True, use_fast=False, ) input = eval_tokenizer( "what is zen ?", return_tensors="pt").to('cuda') #model = model.to('cuda') model.eval() with torch.no_grad(): print(eval_tokenizer.decode(model.generate(**input, max_new_tokens=200)[0], skip_special_tokens=True))

by **Antonio Linares** » Wed Jan 17, 2024 8:00 pm

Locally using a fine tuned model with quantization:

Code: Select all Expand view: !pip install accelerate==0.25.0 !pip install bitsandbytes==0.41.1 !pip install datasets==2.14.6 !pip install peft==0.6.2 !pip install transformers==4.36.2 !pip install torch==2.1.0 !pip install einops==0.4.1 # Phi needs this one

Code: Select all Expand view: import torch from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( "./Phi2-FT", load_in_8bit=True, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16, ) tokenizer = AutoTokenizer.from_pretrained( "./Phi2-FT", add_bos_token=True, trust_remote_code=True, use_fast=False, ) input = eval_tokenizer( "what is zen ?", return_tensors="pt").to('cuda') model.eval() with torch.no_grad(): print(eval_tokenizer.decode(model.generate(**input, max_new_tokens=200)[0], skip_special_tokens=True))

by **Antonio Linares** » Thu Jan 18, 2024 6:47 am

https://www.kaggle.com/code/lucamassaron/fine-tune-phi-2-for-sentiment-analysis

https://medium.com/@yernenip/optimizing-phi-2-a-deep-dive-into-fine-tuning-small-language-models-9d545ac90a99

https://medium.aiplanet.com/fine-tune-small-model-micphi-2-to-convert-natural-language-to-sql-32fc4f6ed40c

by **Antonio Linares** » Thu Feb 01, 2024 9:03 am

https://huggingface.co/blog/g-ronimo/phinetuning

FiveTech Software tech support forums

Fine tune Phi2 from Microsoft with your own data

Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Re: Fine tune Phi2 from Microsoft with your own data

Who is online