Compare commits

..

7 Commits

Author SHA1 Message Date
Georgi Gerganov 1808ee0500
Add initial contribution guidelines
2 years ago
Matvey Soloviev a169bb889c Gate signal support on being on a unixoid system. (#74)
2 years ago
Matvey Soloviev 460c482540 Fix token count accounting
2 years ago
Georgi Gerganov c80e2a8f2a
Revert "10% performance boost on ARM"
2 years ago
Georgi Gerganov 54a0e66ea0
Check for vdotq_s32 availability
2 years ago
Georgi Gerganov 543c57e991
Ammend to previous commit - forgot to update non-QRDMX branch
2 years ago
Georgi Gerganov 113a9e83eb
10% performance boost on ARM
2 years ago

@ -218,3 +218,18 @@ Note the use of `--color` to distinguish between user input and generated text.
know how to utilize it properly. But in any case, you can even disable it with `LLAMA_NO_ACCELERATE=1 make` and the
performance will be the same, since no BLAS calls are invoked by the current implementation
### Contributing
- There are 2 git branches: [master](https://github.com/ggerganov/llama.cpp/commits/master) and [dev](https://github.com/ggerganov/llama.cpp/commits/dev)
- Contributors can open PRs to either one
- Collaborators can push straight into `dev`, but need to open a PR to get stuff to `master`
- Collaborators will be invited based on contributions
- `dev` branch is considered unstable
- `master` branch is considered stable and approved. 3-rd party projects should use the `master` branch
General principles to follow when writing code:
- Avoid adding third-party dependencies, extra files, extra headers, etc.
- Always consider cross-compatibility with other operating systems and architectures
- Avoid fancy looking modern STL constructs, use basic for loops, avoid templates, keep it simple
- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit

@ -1360,22 +1360,6 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
const int8x16_t v1_1hs = vsubq_s8(v1_1h, s8b);
// dot product into int16x8_t
#if defined(__ARM_FEATURE_DOTPROD)
int32x4_t p_0 = vdotq_s32(vdupq_n_s32(0), v0_0ls, v1_0ls);
int32x4_t p_1 = vdotq_s32(vdupq_n_s32(0), v0_1ls, v1_1ls);
p_0 = vdotq_s32(p_0, v0_0hs, v1_0hs);
p_1 = vdotq_s32(p_1, v0_1hs, v1_1hs);
// scalar
#if defined(__ARM_FEATURE_QRDMX)
sum0 += d0_0*d1_0*vaddvq_s32(p_0);
sum1 += d0_1*d1_1*vaddvq_s32(p_1);
#else
sum0 += d0_0*d1_0*(vgetq_lane_s32(p_0, 0) + vgetq_lane_s32(p_0, 1) + vgetq_lane_s32(p_0, 2) + vgetq_lane_s32(p_0, 3));
sum1 += d0_1*d1_1*(vgetq_lane_s32(p_1, 0) + vgetq_lane_s32(p_1, 1) + vgetq_lane_s32(p_1, 2) + vgetq_lane_s32(p_1, 3));
#endif
#else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0ls));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0ls));
@ -1404,7 +1388,6 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
#else
sum0 += d0_0*d1_0*(vgetq_lane_s16(p_0, 0) + vgetq_lane_s16(p_0, 1) + vgetq_lane_s16(p_0, 2) + vgetq_lane_s16(p_0, 3) + vgetq_lane_s16(p_0, 4) + vgetq_lane_s16(p_0, 5) + vgetq_lane_s16(p_0, 6) + vgetq_lane_s16(p_0, 7));
sum1 += d0_1*d1_1*(vgetq_lane_s16(p_1, 0) + vgetq_lane_s16(p_1, 1) + vgetq_lane_s16(p_1, 2) + vgetq_lane_s16(p_1, 3) + vgetq_lane_s16(p_1, 4) + vgetq_lane_s16(p_1, 5) + vgetq_lane_s16(p_1, 6) + vgetq_lane_s16(p_1, 7));
#endif
#endif
}

@ -11,8 +11,10 @@
#include <string>
#include <vector>
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#include <signal.h>
#include <unistd.h>
#endif
#define ANSI_COLOR_RED "\x1b[31m"
#define ANSI_COLOR_GREEN "\x1b[32m"
@ -747,6 +749,7 @@ bool llama_eval(
static bool is_interacting = false;
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
void sigint_handler(int signo) {
if (signo == SIGINT) {
if (!is_interacting) {
@ -756,6 +759,7 @@ void sigint_handler(int signo) {
}
}
}
#endif
int main(int argc, char ** argv) {
ggml_time_init();
@ -822,11 +826,13 @@ int main(int argc, char ** argv) {
}
printf("\n");
if (params.interactive) {
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
sigint_action.sa_handler = sigint_handler;
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
#endif
printf("%s: interactive mode on.\n", __func__);
@ -855,7 +861,9 @@ int main(int argc, char ** argv) {
if (params.interactive) {
printf("== Running in interactive mode. ==\n"
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
" - Press Ctrl+C to interject at any time.\n"
#endif
" - Press Return to return control to LLaMa.\n"
" - If you want to submit another line, end your input in '\\'.\n");
}
@ -957,10 +965,15 @@ int main(int argc, char ** argv) {
// currently being interactive
bool another_line=true;
while (another_line) {
fflush(stdout);
char buf[256] = {0};
int n_read;
if(params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN);
scanf("%255[^\n]%n%*c", buf, &n_read);
if (scanf("%255[^\n]%n%*c", buf, &n_read) <= 0) {
// presumable empty line, consume the newline
scanf("%*c");
n_read=0;
}
if(params.use_color) printf(ANSI_COLOR_RESET);
if (n_read > 0 && buf[n_read-1]=='\\') {
@ -976,6 +989,8 @@ int main(int argc, char ** argv) {
std::vector<gpt_vocab::id> line_inp = ::llama_tokenize(vocab, buf, false);
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
remaining_tokens -= line_inp.size();
input_noecho = true; // do not echo this again
}

Loading…
Cancel
Save