Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
Georgi Gerganov | 1d38a69d7c | 1 year ago |
Georgi Gerganov | a0f92eff2d | 1 year ago |
Georgi Gerganov | ed683187cb | 1 year ago |
@ -0,0 +1,6 @@
|
||||
#
|
||||
# t5
|
||||
|
||||
set(TEST_TARGET t5)
|
||||
add_executable(${TEST_TARGET} main.cpp)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE ggml ggml_utils)
|
@ -0,0 +1,3 @@
|
||||
# t5
|
||||
|
||||
ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py
|
@ -0,0 +1,119 @@
|
||||
import io
|
||||
import sys
|
||||
import torch
|
||||
import json
|
||||
import struct
|
||||
import numpy
|
||||
|
||||
import code # tmp
|
||||
|
||||
#from transformers import AutoModelForSeq2SeqLM
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-flan-t5-pt-to-ggml.py path-to-pt-model dir-output [use-f32]\n")
|
||||
sys.exit(1)
|
||||
|
||||
dir_inp = sys.argv[1]
|
||||
dir_out = sys.argv[2]
|
||||
|
||||
fname_inp = dir_inp + "/pytorch_model.bin"
|
||||
fname_out = dir_out + "/ggml-t5-model.bin"
|
||||
|
||||
fname_config = dir_inp + "/config.json"
|
||||
|
||||
# use 16-bit or 32-bit floats
|
||||
use_f16 = True
|
||||
if len(sys.argv) > 3:
|
||||
use_f16 = False
|
||||
fname_out = dir_out + "/ggml-t5-model-f32.bin"
|
||||
|
||||
# load torch model
|
||||
try:
|
||||
model_bytes = open(fname_inp, "rb").read()
|
||||
with io.BytesIO(model_bytes) as fp:
|
||||
checkpoint = torch.load(fp, map_location="cpu")
|
||||
except:
|
||||
print("Error: failed to load PyTorch model file: %s" % fname_inp)
|
||||
sys.exit(1)
|
||||
|
||||
# load config (json)
|
||||
config = json.load(open(fname_config, "r"))
|
||||
|
||||
# list all keys
|
||||
for k in checkpoint.keys():
|
||||
print(k)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
||||
|
||||
# list methods of tokenizer
|
||||
for m in dir(tokenizer):
|
||||
print(m)
|
||||
|
||||
print(config)
|
||||
|
||||
fout = open(fname_out, "wb")
|
||||
|
||||
fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
|
||||
fout.write(struct.pack("i", config["vocab_size"]))
|
||||
fout.write(struct.pack("i", config["d_ff"]))
|
||||
fout.write(struct.pack("i", config["d_kv"]))
|
||||
fout.write(struct.pack("i", config["d_model"]))
|
||||
fout.write(struct.pack("i", config["n_positions"]))
|
||||
fout.write(struct.pack("i", config["num_heads"]))
|
||||
fout.write(struct.pack("i", config["num_layers"]))
|
||||
fout.write(struct.pack("i", use_f16))
|
||||
|
||||
# sort tokenizer.vocab by value
|
||||
tokens = sorted(tokenizer.vocab.items(), key=lambda x: x[1])
|
||||
fout.write(struct.pack("i", len(tokens)))
|
||||
|
||||
print("tokens: %d" % len(tokens))
|
||||
|
||||
for key in tokens:
|
||||
# TODO: this probably is wrong, but it should work for english at least
|
||||
token = key[0].replace("▁", " ")
|
||||
text = bytearray(token, "utf-8")
|
||||
fout.write(struct.pack("i", len(text)))
|
||||
fout.write(text)
|
||||
|
||||
# tokenize "hello world"
|
||||
#print(tokenizer.encode("Hello hello world.Hello-Hello"))
|
||||
#print(tokenizer("добър ден", return_tensors="pt"))
|
||||
|
||||
# dump weights
|
||||
for k in checkpoint.keys():
|
||||
data = checkpoint[k].squeeze().numpy()
|
||||
|
||||
name = k
|
||||
n_dims = len(data.shape)
|
||||
print(name, n_dims, data.shape)
|
||||
|
||||
ftype = 1;
|
||||
if use_f16:
|
||||
if n_dims < 2:
|
||||
print(" Converting to float32")
|
||||
ftype = 0
|
||||
else:
|
||||
print(" Converting to float16")
|
||||
data = data.astype(numpy.float16)
|
||||
ftype = 1
|
||||
else:
|
||||
ftype = 0
|
||||
|
||||
# header
|
||||
str = name.encode('utf-8')
|
||||
fout.write(struct.pack("iii", n_dims, len(str), ftype))
|
||||
for i in range(n_dims):
|
||||
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
|
||||
fout.write(str);
|
||||
|
||||
# data
|
||||
data.tofile(fout)
|
||||
|
||||
fout.close()
|
||||
|
||||
print("Done. Output file: " + fname_out)
|
||||
print("")
|
||||
|
||||
#code.interact(local=locals())
|
@ -0,0 +1,740 @@
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// available t5 models
|
||||
enum e_model {
|
||||
MODEL_UNKNOWN,
|
||||
MODEL_SMALL,
|
||||
MODEL_BASE,
|
||||
MODEL_LARGE,
|
||||
MODEL_XL,
|
||||
MODEL_XXL,
|
||||
};
|
||||
|
||||
static const size_t MB = 4*1024*1024;
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_MODEL = {
|
||||
{ MODEL_SMALL, 74ull*MB },
|
||||
{ MODEL_BASE, 142ull*MB },
|
||||
{ MODEL_LARGE, 466ull*MB },
|
||||
{ MODEL_XL, 1464ull*MB },
|
||||
{ MODEL_XXL, 2952ull*MB },
|
||||
};
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_MEMORY = {
|
||||
{ MODEL_SMALL, 12ull*MB },
|
||||
{ MODEL_BASE, 24ull*MB },
|
||||
{ MODEL_LARGE, 70ull*MB },
|
||||
{ MODEL_XL, 184ull*MB },
|
||||
{ MODEL_XXL, 306ull*MB },
|
||||
};
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
|
||||
{ MODEL_SMALL, 80ull*MB },
|
||||
{ MODEL_BASE, 128ull*MB },
|
||||
{ MODEL_LARGE, 300ull*MB },
|
||||
{ MODEL_XL, 680ull*MB },
|
||||
{ MODEL_XXL, 1100ull*MB },
|
||||
};
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_ENCODE_LAYER = {
|
||||
{ MODEL_SMALL, 104ull*MB },
|
||||
{ MODEL_BASE, 138ull*MB },
|
||||
{ MODEL_LARGE, 208ull*MB },
|
||||
{ MODEL_XL, 280ull*MB },
|
||||
{ MODEL_XXL, 354ull*MB },
|
||||
};
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_DECODE = {
|
||||
{ MODEL_SMALL, 200ull*MB },
|
||||
{ MODEL_BASE, 202ull*MB },
|
||||
{ MODEL_LARGE, 204ull*MB },
|
||||
{ MODEL_XL, 206ull*MB },
|
||||
{ MODEL_XXL, 208ull*MB },
|
||||
};
|
||||
|
||||
static const std::map<e_model, size_t> MEM_REQ_DECODE_LAYER = {
|
||||
{ MODEL_SMALL, 32ull*MB },
|
||||
{ MODEL_BASE, 44ull*MB },
|
||||
{ MODEL_LARGE, 64ull*MB },
|
||||
{ MODEL_XL, 84ull*MB },
|
||||
{ MODEL_XXL, 110ull*MB },
|
||||
};
|
||||
|
||||
struct t5_vocab {
|
||||
using id = int32_t;
|
||||
using token = std::string;
|
||||
|
||||
int n_vocab = 32128;
|
||||
|
||||
std::map<token, id> token_to_id;
|
||||
std::map<id, token> id_to_token;
|
||||
};
|
||||
|
||||
// default hparams (FLAN-T5 Small)
|
||||
struct t5_hparams {
|
||||
int32_t n_vocab = 32128;
|
||||
int32_t d_ff = 1024;
|
||||
int32_t d_kv = 64;
|
||||
int32_t d_model = 512;
|
||||
int32_t n_positions = 512;
|
||||
int32_t n_head = 6;
|
||||
int32_t n_layer = 8;
|
||||
int32_t f16 = 1;
|
||||
};
|
||||
|
||||
struct t5_layer_encoder {
|
||||
// encoder.block.*.layer.0.SelfAttention
|
||||
struct ggml_tensor * attn_q;
|
||||
struct ggml_tensor * attn_k;
|
||||
struct ggml_tensor * attn_v;
|
||||
struct ggml_tensor * attn_o;
|
||||
|
||||
// encoder.blocks.*.layer.0.layer_norm
|
||||
struct ggml_tensor * ln_0;
|
||||
|
||||
// encoder.blocks.*.layer.1.DenseReluDense
|
||||
struct ggml_tensor * wi_0;
|
||||
struct ggml_tensor * wi_1;
|
||||
struct ggml_tensor * wo;
|
||||
|
||||
// encoder.blocks.*.layer.1.layer_norm
|
||||
struct ggml_tensor * ln_1;
|
||||
};
|
||||
|
||||
struct t5_layer_decoder {
|
||||
// decoder.block.*.layer.0.SelfAttention
|
||||
struct ggml_tensor * attn_q;
|
||||
struct ggml_tensor * attn_k;
|
||||
struct ggml_tensor * attn_v;
|
||||
struct ggml_tensor * attn_o;
|
||||
|
||||
// decoder.blocks.*.layer.0.layer_norm
|
||||
struct ggml_tensor * ln_0;
|
||||
|
||||
// decoder.blocks.*.layer.1.EncDecAttention
|
||||
struct ggml_tensor * cross_attn_q;
|
||||
struct ggml_tensor * cross_attn_k;
|
||||
struct ggml_tensor * cross_attn_v;
|
||||
struct ggml_tensor * cross_attn_o;
|
||||
|
||||
// decoder.blocks.*.layer.1.layer_norm
|
||||
struct ggml_tensor * ln_1;
|
||||
|
||||
// decoder.blocks.*.layer.1.DenseReluDense
|
||||
struct ggml_tensor * wi_0;
|
||||
struct ggml_tensor * wi_1;
|
||||
struct ggml_tensor * wo;
|
||||
|
||||
// decoder.blocks.*.layer.1.layer_norm
|
||||
struct ggml_tensor * ln_2;
|
||||
};
|
||||
|
||||
struct t5_model {
|
||||
e_model type = MODEL_UNKNOWN;
|
||||
|
||||
t5_hparams hparams;
|
||||
|
||||
// shared
|
||||
struct ggml_tensor * shared;
|
||||
|
||||
// encoder.embed_tokens
|
||||
struct ggml_tensor * e_et;
|
||||
|
||||
// encoder.final_layer_norm
|
||||
struct ggml_tensor * e_ln;
|
||||
|
||||
// encoder.block.0.layer.0.SelfAttention.relative_attention_bias
|
||||
struct ggml_tensor * e_rab;
|
||||
|
||||
// decoder.embed_tokens
|
||||
struct ggml_tensor * d_et;
|
||||
|
||||
// decoder.final_layer_norm
|
||||
struct ggml_tensor * d_ln;
|
||||
|
||||
// decoder.block.0.layer.0.SelfAttention.relative_attention_bias
|
||||
struct ggml_tensor * d_rab;
|
||||
|
||||
// lm_head
|
||||
struct ggml_tensor * lm_head;
|
||||
|
||||
std::vector<t5_layer_encoder> layers_encoder;
|
||||
std::vector<t5_layer_decoder> layers_decoder;
|
||||
|
||||
// context
|
||||
struct ggml_context * ctx;
|
||||
struct ggml_context * ctx_mem;
|
||||
|
||||
// tensors
|
||||
int n_loaded;
|
||||
std::map<std::string, struct ggml_tensor *> tensors;
|
||||
};
|
||||
|
||||
struct t5_context {
|
||||
int64_t t_load_us = 0;
|
||||
int64_t t_sample_us = 0;
|
||||
int64_t t_encode_us = 0;
|
||||
int64_t t_decode_us = 0;
|
||||
int64_t t_start_us = 0;
|
||||
|
||||
std::vector<uint8_t> buf_model;
|
||||
std::vector<uint8_t> buf_memory;
|
||||
std::vector<uint8_t> buf_compute;
|
||||
std::vector<uint8_t> buf_compute_layer;
|
||||
|
||||
t5_model model;
|
||||
t5_vocab vocab;
|
||||
|
||||
std::vector<float> probs;
|
||||
std::vector<float> logits;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static void read_safe(std::ifstream& fin, T& dest) {
|
||||
fin.read((char*)& dest, sizeof(T));
|
||||
}
|
||||
|
||||
static bool t5_model_load(const std::string & fname, t5_context & wctx) {
|
||||
fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
|
||||
|
||||
auto & model = wctx.model;
|
||||
auto & vocab = wctx.vocab;
|
||||
|
||||
auto fin = std::ifstream(fname, std::ios::binary);
|
||||
if (!fin) {
|
||||
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// verify magic
|
||||
{
|
||||
uint32_t magic;
|
||||
read_safe(fin, magic);
|
||||
if (magic != 0x67676d6c) {
|
||||
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//load hparams
|
||||
{
|
||||
auto & hparams = model.hparams;
|
||||
|
||||
read_safe(fin, hparams.n_vocab);
|
||||
read_safe(fin, hparams.d_ff);
|
||||
read_safe(fin, hparams.d_kv);
|
||||
read_safe(fin, hparams.d_model);
|
||||
read_safe(fin, hparams.n_positions);
|
||||
read_safe(fin, hparams.n_head);
|
||||
read_safe(fin, hparams.n_layer);
|
||||
read_safe(fin, hparams.f16);
|
||||
|
||||
assert(hparams.n_text_state == hparams.n_audio_state);
|
||||
|
||||
if (hparams.n_layer == 8) {
|
||||
model.type = e_model::MODEL_SMALL;
|
||||
}
|
||||
|
||||
if (hparams.n_layer == 12) {
|
||||
model.type = e_model::MODEL_BASE;
|
||||
}
|
||||
|
||||
if (hparams.n_layer == 24 && hparams.n_head == 16) {
|
||||
model.type = e_model::MODEL_LARGE;
|
||||
}
|
||||
|
||||
if (hparams.n_layer == 24 && hparams.n_head == 32) {
|
||||
model.type = e_model::MODEL_XL;
|
||||
}
|
||||
|
||||
if (hparams.n_layer == 24 && hparams.n_head == 64) {
|
||||
model.type = e_model::MODEL_XXL;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||
fprintf(stderr, "%s: d_ff = %d\n", __func__, hparams.d_ff);
|
||||
fprintf(stderr, "%s: d_kv = %d\n", __func__, hparams.d_kv);
|
||||
fprintf(stderr, "%s: d_model = %d\n", __func__, hparams.d_model);
|
||||
fprintf(stderr, "%s: n_positions = %d\n", __func__, hparams.n_positions);
|
||||
fprintf(stderr, "%s: n_head = %d\n", __func__, hparams.n_head);
|
||||
fprintf(stderr, "%s: n_layer = %d\n", __func__, hparams.n_layer);
|
||||
fprintf(stderr, "%s: f16 = %d\n", __func__, hparams.f16);
|
||||
fprintf(stderr, "%s: type = %d\n", __func__, model.type);
|
||||
|
||||
wctx.buf_model.resize(MEM_REQ_MODEL.at(model.type));
|
||||
wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type));
|
||||
wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
|
||||
wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
|
||||
}
|
||||
|
||||
// load vocab
|
||||
{
|
||||
int32_t n_vocab = 0;
|
||||
read_safe(fin, n_vocab);
|
||||
|
||||
//if (n_vocab != model.hparams.n_vocab) {
|
||||
// fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
|
||||
// __func__, fname.c_str(), n_vocab, model.hparams.n_vocab);
|
||||
// return false;
|
||||
//}
|
||||
|
||||
std::string word;
|
||||
std::vector<char> tmp;
|
||||
|
||||
tmp.reserve(128);
|
||||
|
||||
for (int i = 0; i < n_vocab; i++) {
|
||||
uint32_t len;
|
||||
read_safe(fin, len);
|
||||
|
||||
if (len > 0) {
|
||||
tmp.resize(len);
|
||||
fin.read(&tmp[0], tmp.size()); // read to buffer
|
||||
word.assign(&tmp[0], tmp.size());
|
||||
} else {
|
||||
// seems like we have an empty-string token in multi-language models (i = 50256)
|
||||
//fprintf(stderr, "%s: warning: empty-string token in vocab, i = %d\n", __func__, i);
|
||||
word = "";
|
||||
}
|
||||
|
||||
vocab.token_to_id[word] = i;
|
||||
vocab.id_to_token[i] = word;
|
||||
|
||||
//printf("%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
|
||||
}
|
||||
|
||||
vocab.n_vocab = model.hparams.n_vocab;
|
||||
|
||||
if (n_vocab < model.hparams.n_vocab) {
|
||||
fprintf(stderr, "%s: adding %d extra tokens\n", __func__, model.hparams.n_vocab - n_vocab);
|
||||
for (int i = n_vocab; i < model.hparams.n_vocab; i++) {
|
||||
word = "[_extra_token_" + std::to_string(i) + "]";
|
||||
vocab.token_to_id[word] = i;
|
||||
vocab.id_to_token[i] = word;
|
||||
}
|
||||
}
|
||||
|
||||
wctx.logits.reserve(vocab.n_vocab*model.hparams.d_model);
|
||||
wctx.probs.reserve(vocab.n_vocab*model.hparams.d_model);
|
||||
}
|
||||
|
||||
{
|
||||
// this is the total memory required to run the inference
|
||||
const size_t mem_required =
|
||||
wctx.buf_model.size() +
|
||||
wctx.buf_memory.size() +
|
||||
wctx.buf_compute.size() +
|
||||
wctx.buf_compute_layer.size();
|
||||
|
||||
fprintf(stderr, "%s: mem_required = %7.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
|
||||
}
|
||||
|
||||
// for the big tensors, we have the option to store the data in 16-bit floats
|
||||
// in order to save memory and also to speed up the computation
|
||||
const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
||||
|
||||
size_t ctx_size = 0;
|
||||
|
||||
{
|
||||
const auto & hparams = model.hparams;
|
||||
|
||||
const int n_vocab = hparams.n_vocab;
|
||||
const int d_ff = hparams.d_ff;
|
||||
const int d_kv = hparams.d_kv;
|
||||
const int d_model = hparams.d_model;
|
||||
const int n_head = hparams.n_head;
|
||||
const int n_layer = hparams.n_layer;
|
||||
|
||||
ctx_size += n_vocab*d_model*ggml_type_size(wtype); // shared;
|
||||
ctx_size += n_vocab*d_model*ggml_type_size(wtype); // lm_head;
|
||||
|
||||
// encoder
|
||||
{
|
||||
ctx_size += n_vocab*d_model*ggml_type_size(wtype); // e_et;
|
||||
ctx_size += d_model*ggml_type_size(GGML_TYPE_F32); // e_ln
|
||||
ctx_size += 32*n_head*ggml_type_size(wtype); // e_rab
|
||||
}
|
||||
|
||||
// decoder
|
||||
{
|
||||
ctx_size += n_vocab*d_model*ggml_type_size(wtype); // d_et;
|
||||
ctx_size += d_model*ggml_type_size(GGML_TYPE_F32); // d_ln
|
||||
ctx_size += 32*n_head*ggml_type_size(wtype); // d_rab
|
||||
}
|
||||
|
||||
// encoder layers
|
||||
{
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_q
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_k
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_v
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_o
|
||||
|
||||
ctx_size += n_layer*(d_model*ggml_type_size(GGML_TYPE_F32)); // ln_0
|
||||
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wi_0
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wi_1
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wo
|
||||
|
||||
ctx_size += n_layer*(d_model*ggml_type_size(GGML_TYPE_F32)); // ln_1
|
||||
}
|
||||
|
||||
// decoder layers
|
||||
{
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_q
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_k
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_v
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // attn_o
|
||||
|
||||
ctx_size += n_layer*(d_model*ggml_type_size(GGML_TYPE_F32)); // ln_0
|
||||
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // cross_attn_q
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // cross_attn_k
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // cross_attn_v
|
||||
ctx_size += n_layer*(d_kv*n_head*d_model*ggml_type_size(wtype)); // cross_attn_o
|
||||
|
||||
ctx_size += n_layer*(d_model*ggml_type_size(GGML_TYPE_F32)); // ln_1
|
||||
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wi_0
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wi_1
|
||||
ctx_size += n_layer*(d_ff*d_model*ggml_type_size(wtype)); // wo
|
||||
|
||||
ctx_size += n_layer*(d_model*ggml_type_size(GGML_TYPE_F32)); // ln_2
|
||||
}
|
||||
|
||||
ctx_size += (15 + 9*n_layer + 14*n_layer)*256; // object overhead
|
||||
|
||||
fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
|
||||
}
|
||||
|
||||
// create the ggml context
|
||||
{
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = wctx.buf_model.size();
|
||||
params.mem_buffer = wctx.buf_model.data();
|
||||
|
||||
model.ctx = ggml_init(params);
|
||||
if (!model.ctx) {
|
||||
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// prepare memory for the weights
|
||||
{
|
||||
auto & ctx = model.ctx;
|
||||
|
||||
const auto & hparams = model.hparams;
|
||||
|
||||
const int n_vocab = hparams.n_vocab;
|
||||
const int d_ff = hparams.d_ff;
|
||||
const int d_kv = hparams.d_kv;
|
||||
const int d_model = hparams.d_model;
|
||||
const int n_head = hparams.n_head;
|
||||
const int n_layer = hparams.n_layer;
|
||||
|
||||
model.layers_encoder.resize(n_layer);
|
||||
model.layers_decoder.resize(n_layer);
|
||||
|
||||
// global
|
||||
{
|
||||
model.shared = ggml_new_tensor_2d(ctx, wtype, d_model, n_vocab);
|
||||
model.lm_head = ggml_new_tensor_2d(ctx, wtype, d_model, n_vocab);
|
||||
|
||||
model.tensors["shared.weight"] = model.shared;
|
||||
model.tensors["lm_head.weight"] = model.lm_head;
|
||||
}
|
||||
|
||||
// encoder
|
||||
{
|
||||
model.e_et = ggml_new_tensor_2d(ctx, wtype, d_model, n_vocab);
|
||||
model.e_ln = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
model.e_rab = ggml_new_tensor_2d(ctx, wtype, n_head, 32);
|
||||
|
||||
// map by name
|
||||
model.tensors["encoder.embed_tokens.weight"] = model.e_et;
|
||||
model.tensors["encoder.final_layer_norm.weight"] = model.e_ln;
|
||||
|
||||
model.tensors["encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"] = model.e_rab;
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = model.layers_encoder[i];
|
||||
|
||||
layer.attn_q = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_k = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_v = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_o = ggml_new_tensor_2d(ctx, wtype, d_kv*n_head, d_model);
|
||||
|
||||
layer.ln_0 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
layer.wi_0 = ggml_new_tensor_2d(ctx, wtype, d_model, d_ff);
|
||||
layer.wi_1 = ggml_new_tensor_2d(ctx, wtype, d_model, d_ff);
|
||||
layer.wo = ggml_new_tensor_2d(ctx, wtype, d_ff, d_model);
|
||||
|
||||
layer.ln_1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
// map by name
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.0.SelfAttention.q.weight"] = layer.attn_q;
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.0.SelfAttention.k.weight"] = layer.attn_k;
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.0.SelfAttention.v.weight"] = layer.attn_v;
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.0.SelfAttention.o.weight"] = layer.attn_o;
|
||||
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.0.layer_norm.weight"] = layer.ln_0;
|
||||
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.1.DenseReluDense.wi_0.weight"] = layer.wi_0;
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.1.DenseReluDense.wi_1.weight"] = layer.wi_1;
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.1.DenseReluDense.wo.weight"] = layer.wo;
|
||||
|
||||
model.tensors["encoder.block." + std::to_string(i) + ".layer.1.layer_norm.weight"] = layer.ln_1;
|
||||
}
|
||||
}
|
||||
|
||||
// decoder
|
||||
{
|
||||
model.d_et = ggml_new_tensor_2d(ctx, wtype, d_model, n_vocab);
|
||||
model.d_ln = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
model.d_rab = ggml_new_tensor_2d(ctx, wtype, n_head, 32);
|
||||
|
||||
// map by name
|
||||
model.tensors["decoder.embed_tokens.weight"] = model.d_et;
|
||||
model.tensors["decoder.final_layer_norm.weight"] = model.d_ln;
|
||||
|
||||
model.tensors["decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"] = model.d_rab;
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = model.layers_decoder[i];
|
||||
|
||||
layer.attn_q = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_k = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_v = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.attn_o = ggml_new_tensor_2d(ctx, wtype, d_kv*n_head, d_model);
|
||||
|
||||
layer.ln_0 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
layer.cross_attn_q = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.cross_attn_k = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.cross_attn_v = ggml_new_tensor_2d(ctx, wtype, d_model, d_kv*n_head);
|
||||
layer.cross_attn_o = ggml_new_tensor_2d(ctx, wtype, d_kv*n_head, d_model);
|
||||
|
||||
layer.ln_1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
layer.wi_0 = ggml_new_tensor_2d(ctx, wtype, d_model, d_ff);
|
||||
layer.wi_1 = ggml_new_tensor_2d(ctx, wtype, d_model, d_ff);
|
||||
layer.wo = ggml_new_tensor_2d(ctx, wtype, d_ff, d_model);
|
||||
|
||||
layer.ln_2 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, d_model);
|
||||
|
||||
// map by name
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.0.SelfAttention.q.weight"] = layer.attn_q;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.0.SelfAttention.k.weight"] = layer.attn_k;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.0.SelfAttention.v.weight"] = layer.attn_v;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.0.SelfAttention.o.weight"] = layer.attn_o;
|
||||
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.0.layer_norm.weight"] = layer.ln_0;
|
||||
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.1.EncDecAttention.q.weight"] = layer.cross_attn_q;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.1.EncDecAttention.k.weight"] = layer.cross_attn_k;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.1.EncDecAttention.v.weight"] = layer.cross_attn_v;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.1.EncDecAttention.o.weight"] = layer.cross_attn_o;
|
||||
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.1.layer_norm.weight"] = layer.ln_1;
|
||||
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.2.DenseReluDense.wi_0.weight"] = layer.wi_0;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.2.DenseReluDense.wi_1.weight"] = layer.wi_1;
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.2.DenseReluDense.wo.weight"] = layer.wo;
|
||||
|
||||
model.tensors["decoder.block." + std::to_string(i) + ".layer.2.layer_norm.weight"] = layer.ln_2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create the ggml memory context
|
||||
{
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = wctx.buf_memory.size();
|
||||
params.mem_buffer = wctx.buf_memory.data();
|
||||
|
||||
model.ctx_mem = ggml_init(params);
|
||||
if (!model.ctx_mem) {
|
||||
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// key + value memory
|
||||
//{
|
||||
// auto & ctx = model.ctx_mem;
|
||||
|
||||
// const auto & hparams = model.hparams;
|
||||
|
||||
// const int n_text_state = hparams.n_text_state;
|
||||
// const int n_text_layer = hparams.n_text_layer;
|
||||
// const int n_text_ctx = hparams.n_text_ctx;
|
||||
|
||||
// // key/value memory for the self-attention layer
|
||||
// {
|
||||
// const int n_mem = n_text_layer*n_text_ctx;
|
||||
// const int n_elements = n_text_state*n_mem;
|
||||
|
||||
// model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
|
||||
// model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
|
||||
// }
|
||||
|
||||
// // key/value memory for the cross-attention layer
|
||||
// {
|
||||
// const int n_audio_ctx = hparams.n_audio_ctx;
|
||||
|
||||
// const int n_mem = n_text_layer*n_audio_ctx;
|
||||
// const int n_elements = n_text_state*n_mem;
|
||||
|
||||
// model.memory_cross_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
|
||||
// model.memory_cross_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
|
||||
// }
|
||||
|
||||
// const size_t memory_size =
|
||||
// ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v) +
|
||||
// ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
|
||||
|
||||
// fprintf(stderr, "%s: memory size = %7.2f MB\n", __func__, memory_size/1024.0/1024.0);
|
||||
//}
|
||||
|
||||
// load weights
|
||||
{
|
||||
size_t total_size = 0;
|
||||
|
||||
model.n_loaded = 0;
|
||||
|
||||
while (true) {
|
||||
int32_t n_dims;
|
||||
int32_t length;
|
||||
int32_t ftype;
|
||||
|
||||
read_safe(fin, n_dims);
|
||||
read_safe(fin, length);
|
||||
read_safe(fin, ftype);
|
||||
|
||||
if (fin.eof()) {
|
||||
break;
|
||||
}
|
||||
|
||||
int32_t nelements = 1;
|
||||
int32_t ne[3] = { 1, 1, 1 };
|
||||
for (int i = 0; i < n_dims; ++i) {
|
||||
read_safe(fin, ne[i]);
|
||||
nelements *= ne[i];
|
||||
}
|
||||
|
||||
std::string name;
|
||||
std::vector<char> tmp(length); // create a buffer
|
||||
fin.read(&tmp[0], tmp.size()); // read to buffer
|
||||
name.assign(&tmp[0], tmp.size());
|
||||
|
||||
if (model.tensors.find(name) == model.tensors.end()) {
|
||||
fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto tensor = model.tensors[name.data()];
|
||||
if (ggml_nelements(tensor) != nelements) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1] || tensor->ne[2] != ne[2]) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d, %d], expected [%d, %d, %d]\n",
|
||||
__func__, name.data(), tensor->ne[0], tensor->ne[1], tensor->ne[2], ne[0], ne[1], ne[2]);
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t bpe = (ftype == 0) ? sizeof(float) : sizeof(ggml_fp16_t);
|
||||
|
||||
if (nelements*bpe != ggml_nbytes(tensor)) {
|
||||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
|
||||
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
|
||||
return false;
|
||||
}
|
||||
|
||||
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
|
||||
|
||||
printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
||||
total_size += ggml_nbytes(tensor);
|
||||
model.n_loaded++;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: model size = %7.2f MB\n", __func__, total_size/1024.0/1024.0);
|
||||
|
||||
if (model.n_loaded == 0) {
|
||||
fprintf(stderr, "%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
|
||||
} else if (model.n_loaded != (int) model.tensors.size()) {
|
||||
fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), model.n_loaded);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fin.close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct t5_context * t5_init(const char * path_model) {
|
||||
ggml_time_init();
|
||||
|
||||
t5_context * ctx = new t5_context;
|
||||
|
||||
const int64_t t_start_us = ggml_time_us();
|
||||
|
||||
ctx->t_start_us = t_start_us;
|
||||
|
||||
if (!t5_model_load(path_model, *ctx)) {
|
||||
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model);
|
||||
delete ctx;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ctx->t_load_us = ggml_time_us() - t_start_us;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void t5_free(struct t5_context * ctx) {
|
||||
if (ctx) {
|
||||
if (ctx->model.ctx) {
|
||||
ggml_free(ctx->model.ctx);
|
||||
}
|
||||
if (ctx->model.ctx_mem) {
|
||||
ggml_free(ctx->model.ctx_mem);
|
||||
}
|
||||
delete ctx;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "usage: %s <model>\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char * path_model = argv[1];
|
||||
|
||||
t5_context * ctx = t5_init(path_model);
|
||||
if (!ctx) {
|
||||
fprintf(stderr, "%s: failed to initialize T5 context\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: model loaded in %7.2f ms\n", __func__, ctx->t_load_us/1000.0);
|
||||
|
||||
t5_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,162 +0,0 @@
|
||||
#include "common.h"
|
||||
|
||||
// third-party utilities
|
||||
// use your favorite implementations
|
||||
#define DR_WAV_IMPLEMENTATION
|
||||
#include "dr_wav.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <regex>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
std::string trim(const std::string & s) {
|
||||
std::regex e("^\\s+|\\s+$");
|
||||
return std::regex_replace(s, e, "");
|
||||
}
|
||||
|
||||
std::string replace(const std::string & s, const std::string & from, const std::string & to) {
|
||||
std::string result = s;
|
||||
size_t pos = 0;
|
||||
while ((pos = result.find(from, pos)) != std::string::npos) {
|
||||
result.replace(pos, from.length(), to);
|
||||
pos += to.length();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
|
||||
drwav wav;
|
||||
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
||||
|
||||
if (fname == "-") {
|
||||
{
|
||||
uint8_t buf[1024];
|
||||
while (true)
|
||||
{
|
||||
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
wav_data.insert(wav_data.end(), buf, buf + n);
|
||||
}
|
||||
}
|
||||
|
||||
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
||||
}
|
||||
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (wav.channels != 1 && wav.channels != 2) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stereo && wav.channels != 2) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (wav.bitsPerSample != 16) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
||||
|
||||
std::vector<int16_t> pcm16;
|
||||
pcm16.resize(n*wav.channels);
|
||||
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
||||
drwav_uninit(&wav);
|
||||
|
||||
// convert to mono, float
|
||||
pcmf32.resize(n);
|
||||
if (wav.channels == 1) {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[i])/32768.0f;
|
||||
}
|
||||
} else {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
||||
}
|
||||
}
|
||||
|
||||
if (stereo) {
|
||||
// convert to stereo, float
|
||||
pcmf32s.resize(2);
|
||||
|
||||
pcmf32s[0].resize(n);
|
||||
pcmf32s[1].resize(n);
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
||||
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
|
||||
const float rc = 1.0f / (2.0f * M_PI * cutoff);
|
||||
const float dt = 1.0f / sample_rate;
|
||||
const float alpha = dt / (rc + dt);
|
||||
|
||||
float y = data[0];
|
||||
|
||||
for (size_t i = 1; i < data.size(); i++) {
|
||||
y = alpha * (y + data[i] - data[i - 1]);
|
||||
data[i] = y;
|
||||
}
|
||||
}
|
||||
|
||||
bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
|
||||
const int n_samples = pcmf32.size();
|
||||
const int n_samples_last = (sample_rate * last_ms) / 1000;
|
||||
|
||||
if (n_samples_last >= n_samples) {
|
||||
// not enough samples - assume no speech
|
||||
return false;
|
||||
}
|
||||
|
||||
if (freq_thold > 0.0f) {
|
||||
high_pass_filter(pcmf32, freq_thold, sample_rate);
|
||||
}
|
||||
|
||||
float energy_all = 0.0f;
|
||||
float energy_last = 0.0f;
|
||||
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
energy_all += fabsf(pcmf32[i]);
|
||||
|
||||
if (i >= n_samples - n_samples_last) {
|
||||
energy_last += fabsf(pcmf32[i]);
|
||||
}
|
||||
}
|
||||
|
||||
energy_all /= n_samples;
|
||||
energy_last /= n_samples_last;
|
||||
|
||||
if (verbose) {
|
||||
fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
|
||||
}
|
||||
|
||||
if (energy_last > vad_thold*energy_all) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// needs to match WHISPER_SAMPLE_RATE
|
||||
#define COMMON_SAMPLE_RATE 16000
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
std::string trim(const std::string & s);
|
||||
|
||||
std::string replace(
|
||||
const std::string & s,
|
||||
const std::string & from,
|
||||
const std::string & to);
|
||||
|
||||
// Read WAV audio file and store the PCM data into pcmf32
|
||||
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
||||
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
||||
bool read_wav(
|
||||
const std::string & fname,
|
||||
std::vector<float> & pcmf32,
|
||||
std::vector<std::vector<float>> & pcmf32s,
|
||||
bool stereo);
|
||||
|
||||
// Apply a high-pass frequency filter to PCM audio
|
||||
// Suppresses frequencies below cutoff Hz
|
||||
void high_pass_filter(
|
||||
std::vector<float> & data,
|
||||
float cutoff,
|
||||
float sample_rate);
|
||||
|
||||
// Basic voice activity detection (VAD) using audio energy adaptive threshold
|
||||
bool vad_simple(
|
||||
std::vector<float> & pcmf32,
|
||||
int sample_rate,
|
||||
int last_ms,
|
||||
float vad_thold,
|
||||
float freq_thold,
|
||||
bool verbose);
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,475 +0,0 @@
|
||||
// quantized matrix multiplication
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
#include "arm_neon.h"
|
||||
#endif
|
||||
|
||||
#ifndef MIN
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
const int M = 1280;
|
||||
const int N = 1536;
|
||||
const int K = 1280;
|
||||
|
||||
const int QK = 64;
|
||||
#define QB 7
|
||||
|
||||
//#define GGML_GQ_USE_FP16_SCALE
|
||||
|
||||
#if defined(GGML_GQ_USE_FP16_SCALE)
|
||||
#define gq_scale_t ggml_fp16_t
|
||||
#define GGML_FP32_TO_GQ(x) ggml_fp32_to_fp16(x)
|
||||
#define GGML_GQ_TO_FP32(x) ggml_fp16_to_fp32(x)
|
||||
#else
|
||||
#define gq_scale_t float
|
||||
#define GGML_FP32_TO_GQ(x) (x)
|
||||
#define GGML_GQ_TO_FP32(x) (x)
|
||||
#endif
|
||||
|
||||
#define gq_quant_t uint64_t
|
||||
#define gq_t_bits 64
|
||||
|
||||
uint64_t get_time_us() {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
}
|
||||
|
||||
//
|
||||
// naive implementation
|
||||
//
|
||||
|
||||
void mul_mat_f32_naive(
|
||||
const float * restrict src0, // M x K
|
||||
const float * restrict src1, // N x K (transposed)
|
||||
float * dst,
|
||||
int m, int n, int k) {
|
||||
for (int i = 0; i < m; i++) {
|
||||
for (int j = 0; j < n; j++) {
|
||||
float sum = 0;
|
||||
for (int l = 0; l < k; l++) {
|
||||
sum += src0[i*k + l] * src1[j*k + l];
|
||||
}
|
||||
dst[i*n + j] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// method 1
|
||||
//
|
||||
|
||||
void quantize_1(const float * src, void * dst, int n, int k) {
|
||||
char * p0 = dst;
|
||||
|
||||
gq_quant_t pp[QB];
|
||||
|
||||
for (int j = 0; j < n; j++) {
|
||||
for (int i = 0; i < k/QK; i++) {
|
||||
float min = FLT_MAX;
|
||||
float max = -FLT_MAX;
|
||||
|
||||
// find min/max
|
||||
#ifdef __ARM_NEON
|
||||
{
|
||||
float32x4_t minv = vdupq_n_f32(FLT_MAX);
|
||||
float32x4_t maxv = vdupq_n_f32(-FLT_MAX);
|
||||
|
||||
for (int l = 0; l < QK; l += 4) {
|
||||
float32x4_t v = vld1q_f32(src + j*k + i*QK + l);
|
||||
minv = vminq_f32(minv, v);
|
||||
maxv = vmaxq_f32(maxv, v);
|
||||
}
|
||||
|
||||
float32x2_t minv32 = vpmin_f32(vget_low_f32(minv), vget_high_f32(minv));
|
||||
float32x2_t maxv32 = vpmax_f32(vget_low_f32(maxv), vget_high_f32(maxv));
|
||||
|
||||
min = MIN(vget_lane_f32(minv32, 0), vget_lane_f32(minv32, 1));
|
||||
max = MAX(vget_lane_f32(maxv32, 0), vget_lane_f32(maxv32, 1));
|
||||
|
||||
//printf("SIMD min/max: %f %f\n", min, max);
|
||||
}
|
||||
#else
|
||||
{
|
||||
for (int l = 0; l < QK; l++) {
|
||||
const float v = src[j*k + i*QK + l];
|
||||
if (v < min) min = v;
|
||||
if (v > max) max = v;
|
||||
}
|
||||
|
||||
//printf("NORM min/max: %f %f\n", min, max);
|
||||
}
|
||||
#endif
|
||||
|
||||
const float d = (max - min) / ((1 << QB) - 1);
|
||||
const float id = d ? 1.0/d : 0.0;
|
||||
|
||||
memcpy(p0, &min, sizeof(float)); p0 += sizeof(float);
|
||||
memcpy(p0, &d, sizeof(float)); p0 += sizeof(float);
|
||||
|
||||
//printf("min/max/d/id: %f %f %f %f\n", min, max, d, id);
|
||||
|
||||
for (int s = 0; s < QK/gq_t_bits; ++s) {
|
||||
memset(pp, 0, sizeof(pp));
|
||||
|
||||
for (int l = 0; l < gq_t_bits; l++) {
|
||||
const float v = src[j*k + i*QK + s*gq_t_bits + l];
|
||||
const uint8_t q = (v - min)*id;
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
pp[b] |= q & (1 << b) ? (1ULL << l) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
memcpy(p0, &pp[b], sizeof(gq_quant_t)); p0 += sizeof(gq_quant_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mul_mat_gq_1(
|
||||
const void * src0,
|
||||
const void * src1,
|
||||
float * dst,
|
||||
int m, int n, int k) {
|
||||
const int kp = k & ~(gq_t_bits - 1);
|
||||
|
||||
const char * restrict p0 = src0;
|
||||
const char * restrict p1 = src1;
|
||||
|
||||
float s0[QB + 1];
|
||||
float s1[QB + 1];
|
||||
|
||||
gq_quant_t m0[QB + 1];
|
||||
gq_quant_t m1[QB + 1];
|
||||
|
||||
for (int ir0 = 0; ir0 < m; ir0++) {
|
||||
for (int ir1 = 0; ir1 < n; ir1++) {
|
||||
float sumf = 0.0;
|
||||
|
||||
const char * restrict pp0 = p0 + ir0*((2*sizeof(float) + (QK/gq_t_bits)*QB*sizeof(gq_quant_t))*(k/QK));
|
||||
const char * restrict pp1 = p1 + ir1*((2*sizeof(float) + (QK/gq_t_bits)*QB*sizeof(gq_quant_t))*(k/QK));
|
||||
|
||||
for (int i = 0; i < kp/QK; i++) {
|
||||
float min0, d0;
|
||||
memcpy(&min0, pp0, sizeof(float)); pp0 += sizeof(float);
|
||||
memcpy(&d0, pp0, sizeof(float)); pp0 += sizeof(float);
|
||||
|
||||
float min1, d1;
|
||||
memcpy(&min1, pp1, sizeof(float)); pp1 += sizeof(float);
|
||||
memcpy(&d1, pp1, sizeof(float)); pp1 += sizeof(float);
|
||||
|
||||
//printf("min0/d0 = %f %f | min1/d1 = %f %f\n", min0, d0, min1, d1);
|
||||
|
||||
#if 1
|
||||
// >>> General case for any QB
|
||||
|
||||
s0[0] = min0;
|
||||
s1[0] = min1;
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
s0[b + 1] = d0*(1 << b);
|
||||
s1[b + 1] = d1*(1 << b);
|
||||
}
|
||||
|
||||
m0[0] = -1ULL;
|
||||
m1[0] = -1ULL;
|
||||
|
||||
for (int s = 0; s < QK/gq_t_bits; ++s) {
|
||||
for (int b = 0; b < QB; b++) {
|
||||
memcpy(&m0[b + 1], pp0, sizeof(gq_quant_t)); pp0 += sizeof(gq_quant_t);
|
||||
memcpy(&m1[b + 1], pp1, sizeof(gq_quant_t)); pp1 += sizeof(gq_quant_t);
|
||||
}
|
||||
|
||||
for (int q0 = 0; q0 < QB + 1; q0++) {
|
||||
for (int q1 = 0; q1 < QB + 1; q1++) {
|
||||
sumf += s0[q0]*s1[q1]*__builtin_popcountll(m0[q0] & m1[q1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#endif
|
||||
}
|
||||
|
||||
dst[ir0*n + ir1] = sumf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// method 2
|
||||
//
|
||||
|
||||
static inline int quantize_2_blocks_per_row(int k) {
|
||||
return k/QK;
|
||||
}
|
||||
|
||||
static inline int quantize_2_quants_per_block() {
|
||||
return QK/gq_t_bits;
|
||||
}
|
||||
|
||||
static inline int quantize_2_row_size(int k) {
|
||||
const int nb = quantize_2_blocks_per_row(k);
|
||||
const int nq = quantize_2_quants_per_block();
|
||||
|
||||
return nb*(2*sizeof(gq_scale_t) + nq*QB*sizeof(gq_quant_t));
|
||||
}
|
||||
|
||||
void quantize_2_row(const float * restrict src, void * restrict dst, int k) {
|
||||
assert(k % QK == 0);
|
||||
|
||||
const int nb = quantize_2_blocks_per_row(k);
|
||||
const int nq = quantize_2_quants_per_block();
|
||||
|
||||
gq_scale_t * restrict pm = (gq_scale_t *) (dst);
|
||||
gq_scale_t * restrict pd = (gq_scale_t *) (pm + nb);
|
||||
gq_quant_t * restrict pb = (gq_quant_t *) (pd + nb);
|
||||
|
||||
gq_quant_t pp[QB];
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
float min = FLT_MAX;
|
||||
float max = -FLT_MAX;
|
||||
|
||||
for (int l = 0; l < QK; l++) {
|
||||
const float v = src[i*QK + l];
|
||||
if (v < min) min = v;
|
||||
if (v > max) max = v;
|
||||
}
|
||||
|
||||
const float d = (max - min) / ((1 << QB) - 1);
|
||||
const float id = d ? 1.0/d : 0.0;
|
||||
|
||||
pm[i] = GGML_FP32_TO_GQ(min);
|
||||
pd[i] = GGML_FP32_TO_GQ(d);
|
||||
|
||||
for (int s = 0; s < nq; ++s) {
|
||||
memset(pp, 0, sizeof(pp));
|
||||
|
||||
for (int l = 0; l < gq_t_bits; l++) {
|
||||
const float v = src[i*QK + s*gq_t_bits + l];
|
||||
const uint8_t q = (v - min)*id;
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
pp[b] |= q & (1 << b) ? (1ULL << l) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
pb[i*nq*QB + s*QB + b] = pp[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reimplementation of quantize_2 using quantize_2_row
|
||||
void quantize_2(const float * restrict src, char * restrict dst, int n, int k) {
|
||||
assert(k % QK == 0);
|
||||
|
||||
for (int j = 0; j < n; j++) {
|
||||
quantize_2_row(src + j*k, dst, k);
|
||||
dst = (char *) dst + quantize_2_row_size(k);
|
||||
}
|
||||
}
|
||||
|
||||
void vec_dot_gq_2(const int n, float * restrict s, const void * restrict x, const void * restrict y) {
|
||||
float sumf[(QB + 1)*(QB + 1)];
|
||||
memset(sumf, 0, sizeof(sumf));
|
||||
|
||||
const int nb = quantize_2_blocks_per_row(n);
|
||||
const int nq = quantize_2_quants_per_block();
|
||||
|
||||
const gq_scale_t * restrict pm0 = (const gq_scale_t *) x;
|
||||
const gq_scale_t * restrict pm1 = (const gq_scale_t *) y;
|
||||
|
||||
const gq_scale_t * restrict pd0 = pm0 + nb;
|
||||
const gq_scale_t * restrict pd1 = pm1 + nb;
|
||||
|
||||
const gq_quant_t * restrict pb0 = (const gq_quant_t *) (pd0 + nb);
|
||||
const gq_quant_t * restrict pb1 = (const gq_quant_t *) (pd1 + nb);
|
||||
|
||||
#if 1
|
||||
float s0[QB + 1];
|
||||
float s1[QB + 1];
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
const float m0 = GGML_GQ_TO_FP32(pm0[i]);
|
||||
const float d0 = GGML_GQ_TO_FP32(pd0[i]);
|
||||
|
||||
const float m1 = GGML_GQ_TO_FP32(pm1[i]);
|
||||
const float d1 = GGML_GQ_TO_FP32(pd1[i]);
|
||||
|
||||
s0[0] = m0;
|
||||
s1[0] = m1;
|
||||
|
||||
for (int b = 0; b < QB; b++) {
|
||||
s0[b + 1] = d0*(1 << b);
|
||||
s1[b + 1] = d1*(1 << b);
|
||||
}
|
||||
|
||||
for (int s = 0; s < nq; ++s) {
|
||||
for (int q0 = 0; q0 < QB + 1; q0++) {
|
||||
const gq_quant_t mm0 = q0 ? pb0[i*nq*QB + s*QB + q0 - 1] : -1ULL;
|
||||
for (int q1 = 0; q1 < QB + 1; q1++) {
|
||||
const gq_quant_t mm1 = q1 ? pb1[i*nq*QB + s*QB + q1 - 1] : -1ULL;
|
||||
sumf[q0*(QB + 1) + q1] += s0[q0]*s1[q1]*__builtin_popcountll(mm0 & mm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// SIMD-ify with the assumptions:
|
||||
// - nb is a multiple of 4
|
||||
// - gq_scale_t is float
|
||||
// - gq_quant_t is uint64_t
|
||||
// - QB == 7
|
||||
assert(nb % 4 == 0);
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
#else
|
||||
// TODO
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
for (int q0 = 0; q0 < QB + 1; q0++) {
|
||||
for (int q1 = 1; q1 < QB + 1; q1++) {
|
||||
sumf[q0*(QB + 1)] += sumf[q0*(QB + 1) + q1];
|
||||
}
|
||||
}
|
||||
|
||||
*s = sumf[0];
|
||||
for (int q0 = 1; q0 < QB + 1; q0++) {
|
||||
*s += sumf[q0*(QB + 1)];
|
||||
}
|
||||
}
|
||||
|
||||
// use vec_dot_gq_2 to compute the dot product of two rows
|
||||
void mul_mat_gq_2(
|
||||
const void * src0,
|
||||
const void * src1, // transposed
|
||||
float * dst,
|
||||
int m, int n, int k) {
|
||||
assert(k % QK == 0);
|
||||
|
||||
const int nb = quantize_2_blocks_per_row(k);
|
||||
const int nq = quantize_2_quants_per_block();
|
||||
|
||||
for (int ir0 = 0; ir0 < m; ir0++) {
|
||||
for (int ir1 = 0; ir1 < n; ir1++) {
|
||||
vec_dot_gq_2(k, dst + ir1, src0, src1);
|
||||
src1 = (const char *) src1 + quantize_2_row_size(k);
|
||||
}
|
||||
src0 = (const char *) src0 + quantize_2_row_size(k);
|
||||
src1 = (const char *) src1 - n*quantize_2_row_size(k);
|
||||
|
||||
dst = (float *) dst + n;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
assert(sizeof(gq_quant_t)*8 == gq_t_bits);
|
||||
|
||||
float * src0 = (float *)malloc(sizeof(float)*M*K);
|
||||
float * src1 = (float *)malloc(sizeof(float)*N*K);
|
||||
float * dst = (float *)malloc(sizeof(float)*M*N);
|
||||
|
||||
for (int i = 0; i < M*K; i++) {
|
||||
src0[i] = rand() / (float)RAND_MAX;
|
||||
}
|
||||
|
||||
for (int i = 0; i < N*K; i++) {
|
||||
src1[i] = rand() / (float)RAND_MAX;
|
||||
}
|
||||
|
||||
void * src0_gq = calloc(1, quantize_2_row_size(K)*M);
|
||||
void * src1_gq = calloc(1, quantize_2_row_size(K)*N);
|
||||
|
||||
const size_t sizef16 = sizeof(ggml_fp16_t)*M*K + sizeof(ggml_fp16_t)*N*K;
|
||||
const size_t sizegq = quantize_2_row_size(K)*M + quantize_2_row_size(K)*N;
|
||||
|
||||
printf("compression: %f\n", (float)sizegq/sizef16);
|
||||
|
||||
int method = 0;
|
||||
if (argc > 1) {
|
||||
method = atoi(argv[1]);
|
||||
}
|
||||
|
||||
// convert fp32 -> gq
|
||||
{
|
||||
const uint64_t t_start = get_time_us();
|
||||
|
||||
if (method == 1) {
|
||||
quantize_1(src0, src0_gq, M, K);
|
||||
quantize_1(src1, src1_gq, N, K);
|
||||
}
|
||||
|
||||
if (method == 2) {
|
||||
quantize_2(src0, src0_gq, M, K);
|
||||
quantize_2(src1, src1_gq, N, K);
|
||||
}
|
||||
|
||||
const uint64_t t_end = get_time_us();
|
||||
printf("convert time: %f ms / method = %d\n", (t_end - t_start) / 1000.0, method);
|
||||
}
|
||||
|
||||
const int nIter = 1;
|
||||
|
||||
const clock_t start = clock();
|
||||
const uint64_t start_us = get_time_us();
|
||||
|
||||
double iM = 1.0/M;
|
||||
double sum = 0.0f;
|
||||
for (int i = 0; i < nIter; i++) {
|
||||
if (method == 0) {
|
||||
mul_mat_f32_naive(src0, src1, dst, M, N, K);
|
||||
}
|
||||
|
||||
if (method == 1) {
|
||||
mul_mat_gq_1(src0_gq, src1_gq, dst, M, N, K);
|
||||
}
|
||||
|
||||
if (method == 2) {
|
||||
mul_mat_gq_2(src0_gq, src1_gq, dst, M, N, K);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
sum += dst[i]*iM;
|
||||
}
|
||||
|
||||
{
|
||||
const clock_t end = clock();
|
||||
const uint64_t end_us = get_time_us();
|
||||
printf("%s: elapsed ticks: %ld\n", __func__, end - start);
|
||||
printf("%s: elapsed us: %d / %f ms\n", __func__, (int)(end_us - start_us), (end_us - start_us) / 1000.0 / nIter);
|
||||
}
|
||||
|
||||
printf("%f\n", sum);
|
||||
|
||||
free(src0);
|
||||
free(src1);
|
||||
free(dst);
|
||||
|
||||
free(src0_gq);
|
||||
free(src1_gq);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,218 +0,0 @@
|
||||
// SVD dimensionality reduction
|
||||
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef GGML_USE_ACCELERATE
|
||||
#include <Accelerate/Accelerate.h>
|
||||
#endif
|
||||
|
||||
float frand() {
|
||||
return (float) rand() / (float) RAND_MAX;
|
||||
}
|
||||
|
||||
//int sgesvd_(char *__jobu, char *__jobvt, __CLPK_integer *__m,
|
||||
// __CLPK_integer *__n, __CLPK_real *__a, __CLPK_integer *__lda,
|
||||
// __CLPK_real *__s, __CLPK_real *__u, __CLPK_integer *__ldu,
|
||||
// __CLPK_real *__vt, __CLPK_integer *__ldvt, __CLPK_real *__work,
|
||||
// __CLPK_integer *__lwork,
|
||||
// __CLPK_integer *__info)
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
int m = 10;
|
||||
int n = 5;
|
||||
|
||||
float * A = (float *) malloc(n * m * sizeof(float));
|
||||
float * A0 = (float *) malloc(n * m * sizeof(float));
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
for (int j = 0; j < m; ++j) {
|
||||
A[i * m + j] = (float) (10.0f*(i + 1) + 1.0f * frand());
|
||||
//A[i * m + j] = (float) (10.0f*(i%2 + 1) + 0.1f * frand());
|
||||
//if (i == 2) {
|
||||
// A[i * m + j] += 20*frand();
|
||||
//}
|
||||
if ((i == 1 || i == 3) && j > m/2) {
|
||||
A[i * m + j] = -A[i * m + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// average vector
|
||||
//float * M = (float *) malloc(m * sizeof(float));
|
||||
|
||||
//{
|
||||
// for (int j = 0; j < m; ++j) {
|
||||
// M[j] = 0.0f;
|
||||
// }
|
||||
// for (int i = 0; i < n; ++i) {
|
||||
// for (int j = 0; j < m; ++j) {
|
||||
// M[j] += A[i * m + j];
|
||||
// }
|
||||
// }
|
||||
// for (int j = 0; j < m; ++j) {
|
||||
// M[j] /= (float) n;
|
||||
// }
|
||||
//}
|
||||
|
||||
//// subtract average vector
|
||||
//for (int i = 0; i < n; ++i) {
|
||||
// for (int j = 0; j < m; ++j) {
|
||||
// A[i * m + j] -= M[j];
|
||||
// }
|
||||
//}
|
||||
|
||||
memcpy(A0, A, n * m * sizeof(float));
|
||||
|
||||
// print A
|
||||
printf("A:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < m; ++j) {
|
||||
printf("%9.5f ", A[i * m + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// SVD
|
||||
// A = U * S * V^T
|
||||
|
||||
float * U = (float *) malloc(n * m * sizeof(float));
|
||||
float * S = (float *) malloc(n * sizeof(float));
|
||||
float * V = (float *) malloc(n * n * sizeof(float));
|
||||
|
||||
int lda = m;
|
||||
int ldu = m;
|
||||
int ldvt = n;
|
||||
|
||||
float work_size;
|
||||
int lwork = -1;
|
||||
int info = 0;
|
||||
|
||||
sgesvd_("S", "S", &m, &n, A, &lda, S, U, &ldu, V, &ldvt, &work_size, &lwork, &info);
|
||||
|
||||
lwork = (int) work_size;
|
||||
|
||||
printf("work_size = %f, info = %d, lwork = %d\n", work_size, info, lwork);
|
||||
|
||||
float * work = (float *) malloc(lwork * sizeof(float));
|
||||
|
||||
sgesvd_("S", "S", &m, &n, A, &lda, S, U, &ldu, V, &ldvt, work, &lwork, &info);
|
||||
|
||||
// print U
|
||||
printf("U:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < m; ++j) {
|
||||
printf("%9.5f ", U[i * m + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// normalize S
|
||||
{
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
sum += S[i];
|
||||
}
|
||||
sum *= sqrt((double) m);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
S[i] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
// print S
|
||||
printf("S:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("- %d = %9.5f\n", i, S[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// print V
|
||||
printf("V:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
printf("%9.5f ", V[i * n + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// print A
|
||||
printf("A:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < m; ++j) {
|
||||
printf("%9.5f ", A[i * m + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// compute singular vectors in U
|
||||
for (int i = 0; i < n; ++i) {
|
||||
for (int j = 0; j < m; ++j) {
|
||||
U[i * m + j] *= S[i];
|
||||
}
|
||||
}
|
||||
|
||||
// normalize U
|
||||
for (int i = 0; i < n; ++i) {
|
||||
double sum = 0.0;
|
||||
for (int j = 0; j < m; ++j) {
|
||||
sum += U[i * m + j] * U[i * m + j];
|
||||
}
|
||||
sum = sqrt(sum);
|
||||
for (int j = 0; j < m; ++j) {
|
||||
U[i * m + j] /= sum*sqrt((double) m);
|
||||
}
|
||||
}
|
||||
|
||||
// print U
|
||||
printf("U:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < m; ++j) {
|
||||
printf("%9.5f ", U[i * m + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
// project A0 onto U
|
||||
float * A1 = (float *) malloc(n * n * sizeof(float));
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
for (int j = 0; j < n; ++j) {
|
||||
A1[i * n + j] = 0.0f;
|
||||
for (int k = 0; k < m; ++k) {
|
||||
A1[i * n + j] += A0[i * m + k] * U[j * m + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// print A1
|
||||
printf("A1:\n");
|
||||
for (int i = 0; i < n; ++i) {
|
||||
printf("col %d : ", i);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
printf("%9.5f ", A1[i * n + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in new issue