From bde7ee3751ba167e7ba38ab8acaf04100e24815a Mon Sep 17 00:00:00 2001
From: Abitofevrything <mylo.fawcett@gmail.com>
Date: Sun, 8 Jan 2023 10:26:58 +0100
Subject: [PATCH] Remove ggml_cycles and ggml_perf timings; replace with
 ggml_process_time

---
 examples/talk.wasm/gpt-2.cpp |   4 +-
 examples/talk/gpt-2.cpp      |   4 +-
 ggml.c                       | 136 +++++++++++++++++------------------
 ggml.h                       |  16 ++---
 whisper.cpp                  |   2 +-
 5 files changed, 77 insertions(+), 85 deletions(-)

diff --git a/examples/talk.wasm/gpt-2.cpp b/examples/talk.wasm/gpt-2.cpp
index bc5e099..189cc31 100644
--- a/examples/talk.wasm/gpt-2.cpp
+++ b/examples/talk.wasm/gpt-2.cpp
@@ -837,14 +837,14 @@ struct gpt2_context * gpt2_init(const char * path_model) {
 
     // load the model
     {
-        const int64_t t_start_us = ggml_time_us();
+        const int64_t t_start_us = ggml_real_time_us();
 
         if (!gpt2_model_load(path_model, ctx->model, ctx->vocab)) {
             fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, "gpt-2.bin");
             return nullptr;
         }
 
-        const int64_t t_load_us = ggml_time_us() - t_start_us;
+        const int64_t t_load_us = ggml_real_time_us() - t_start_us;
 
         printf("gpt-2: model loaded in %d ms\n", (int) (t_load_us/1000));
     }
diff --git a/examples/talk/gpt-2.cpp b/examples/talk/gpt-2.cpp
index c28644d..f8a4be8 100644
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@@ -837,7 +837,7 @@ struct gpt2_context * gpt2_init(const char * path_model) {
 
     // load the model
     {
-        const int64_t t_start_us = ggml_time_us();
+        const int64_t t_start_us = ggml_real_time_us();
 
         if (!gpt2_model_load(path_model, ctx->model, ctx->vocab)) {
             fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model);
@@ -845,7 +845,7 @@ struct gpt2_context * gpt2_init(const char * path_model) {
             return nullptr;
         }
 
-        const int64_t t_load_us = ggml_time_us() - t_start_us;
+        const int64_t t_load_us = ggml_real_time_us() - t_start_us;
 
         printf("gpt-2: model loaded in %d ms\n", (int) (t_load_us/1000));
     }
diff --git a/ggml.c b/ggml.c
index c59ee64..a0eb09a 100644
--- a/ggml.c
+++ b/ggml.c
@@ -287,49 +287,41 @@ void ggml_time_init(void) {
     QueryPerformanceFrequency(&frequency);
     timer_freq = frequency.QuadPart;
 }
-int64_t ggml_time_ms(void) {
+int64_t ggml_real_time_ms(void) {
     LARGE_INTEGER t;
     QueryPerformanceCounter(&t);
     return (t.QuadPart * 1000) / timer_freq;
 }
-int64_t ggml_time_us(void) {
+int64_t ggml_real_time_us(void) {
     LARGE_INTEGER t;
     QueryPerformanceCounter(&t);
     return (t.QuadPart * 1000000) / timer_freq;
 }
 #else
 void ggml_time_init(void) {}
-int64_t ggml_time_ms(void) {
+int64_t ggml_real_time_ms(void) {
     struct timespec ts;
     clock_gettime(CLOCK_MONOTONIC, &ts);
     return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
 }
 
-int64_t ggml_time_us(void) {
+int64_t ggml_real_time_us(void) {
     struct timespec ts;
     clock_gettime(CLOCK_MONOTONIC, &ts);
     return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
 }
-#endif
 
-int64_t ggml_cycles(void) {
-    return clock();
+int64_t ggml_process_time_ms(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
 }
 
-int64_t ggml_cycles_per_ms(void) {
-    return CLOCKS_PER_SEC/1000;
+int64_t ggml_process_time_us(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
 }
-
-#ifdef GGML_PERF
-#define ggml_perf_time_ms()       ggml_time_ms()
-#define ggml_perf_time_us()       ggml_time_us()
-#define ggml_perf_cycles()        ggml_cycles()
-#define ggml_perf_cycles_per_ms() ggml_cycles_per_ms()
-#else
-#define ggml_perf_time_ms()       0
-#define ggml_perf_time_us()       0
-#define ggml_perf_cycles()        0
-#define ggml_perf_cycles_per_ms() 0
 #endif
 
 //
@@ -1477,7 +1469,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
     if (is_first_call) {
         // initialize GELU, EXP and F32 tables
         {
-            const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
+            const uint64_t t_start = ggml_real_time_us(); UNUSED(t_start);
 
             ggml_fp16_t ii;
             for (int i = 0; i < (1 << 16); ++i) {
@@ -1488,14 +1480,14 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
                 table_exp_f16[i]  = GGML_FP32_TO_FP16(exp(f));
             }
 
-            const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
+            const uint64_t t_end = ggml_real_time_us(); UNUSED(t_end);
 
             GGML_PRINT_DEBUG("%s: GELU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
         }
 
         // initialize g_state
         {
-            const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
+            const uint64_t t_start = ggml_real_time_us(); UNUSED(t_start);
 
             g_state = (struct ggml_state) {
                 /*.contexts =*/ { { 0 } },
@@ -1505,7 +1497,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
                 g_state.contexts[i].used = false;
             }
 
-            const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
+            const uint64_t t_end = ggml_real_time_us(); UNUSED(t_end);
 
             GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
         }
@@ -1657,8 +1649,8 @@ struct ggml_tensor * ggml_new_tensor_impl(
         /*.opt          =*/ { NULL },
         /*.n_tasks      =*/ 0,
         /*.perf_runs    =*/ 0,
-        /*.perf_cycles  =*/ 0,
-        /*.perf_time_us =*/ 0,
+        /*.perf_process_time_us  =*/ 0,
+        /*.perf_real_time_us =*/ 0,
         /*.data         =*/ data == NULL ? (void *)(result + 1) : data,
         /*.pad          =*/ { 0 },
     };
@@ -4307,7 +4299,7 @@ static void ggml_compute_forward_mul_mat_f32(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
               struct ggml_tensor * dst) {
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -4401,7 +4393,7 @@ static void ggml_compute_forward_mul_mat_f32(
             }
         }
 
-        //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
+        //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_process_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
 
         return;
     }
@@ -4531,7 +4523,7 @@ static void ggml_compute_forward_mul_mat_f32(
         }
     }
 
-    //int64_t t1 = ggml_perf_time_us();
+    //int64_t t1 = ggml_process_time_us();
     //static int64_t acc = 0;
     //acc += t1 - t0;
     //if (t1 - t0 > 10) {
@@ -4550,7 +4542,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
               struct ggml_tensor * dst) {
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -4682,7 +4674,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
             }
         }
 
-        //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
+        //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_process_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
 
         return;
     }
@@ -4836,7 +4828,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
         }
     }
 
-    //int64_t t1 = ggml_time_us();
+    //int64_t t1 = ggml_real_time_us();
     //static int64_t acc = 0;
     //acc += t1 - t0;
     //if (t1 - t0 > 10) {
@@ -5304,7 +5296,7 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -5424,7 +5416,7 @@ static void ggml_compute_forward_conv_1d_1s_f32(
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -5570,7 +5562,7 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -5690,7 +5682,7 @@ static void ggml_compute_forward_conv_1d_2s_f32(
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int ne00 = src0->ne[0];
@@ -5834,7 +5826,7 @@ static void ggml_compute_forward_flash_attn_f32(
         const struct ggml_tensor * v,
         const bool masked,
              struct ggml_tensor * dst) {
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int neq0 = q->ne[0];
@@ -6043,7 +6035,7 @@ static void ggml_compute_forward_flash_attn_f16(
         const struct ggml_tensor * v,
         const bool masked,
              struct ggml_tensor * dst) {
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int neq0 = q->ne[0];
@@ -6318,7 +6310,7 @@ static void ggml_compute_forward_flash_ff_f16(
         const struct ggml_tensor * c0, // F16 proj_w
         const struct ggml_tensor * c1, // F32 proj_b
         struct ggml_tensor * dst) {
-    int64_t t0 = ggml_perf_time_us();
+    int64_t t0 = ggml_process_time_us();
     UNUSED(t0);
 
     const int nea0 = a->ne[0];
@@ -6997,8 +6989,8 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
         /*.grads        =*/ { NULL },
         /*.leafs        =*/ { NULL },
         /*.perf_runs    =*/ 0,
-        /*.perf_cycles  =*/ 0,
-        /*.perf_time_us =*/ 0,
+        /*.perf_process_time_us  =*/ 0,
+        /*.perf_real_time_us =*/ 0,
     };
 
     ggml_build_forward_impl(&result, tensor, false);
@@ -7404,8 +7396,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
         }
     }
 
-    const int64_t perf_start_cycles  = ggml_perf_cycles();
-    const int64_t perf_start_time_us = ggml_perf_time_us();
+    const int64_t perf_process_time_start_us  = ggml_process_time_us();
+    const int64_t perf_real_time_start_us = ggml_real_time_us();
 
     for (int i = 0; i < cgraph->n_nodes; i++) {
         GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, i, cgraph->n_nodes);
@@ -7417,8 +7409,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
         //    continue;
         //}
 
-        const int64_t perf_node_start_cycles  = ggml_perf_cycles();
-        const int64_t perf_node_start_time_us = ggml_perf_time_us();
+        const int64_t perf_node_process_time_start_us  = ggml_process_time_us();
+        const int64_t perf_node_real_time_start_us = ggml_real_time_us();
 
         // INIT
         struct ggml_compute_params params = {
@@ -7543,12 +7535,12 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
 
         // performance stats (node)
         {
-            int64_t perf_cycles_cur  = ggml_perf_cycles()  - perf_node_start_cycles;
-            int64_t perf_time_us_cur = ggml_perf_time_us() - perf_node_start_time_us;
+            int64_t perf_cur_process_time_us  = ggml_process_time_us()  - perf_node_process_time_start_us;
+            int64_t perf_cur_real_time_us = ggml_real_time_us() - perf_node_real_time_start_us;
 
             node->perf_runs++;
-            node->perf_cycles  += perf_cycles_cur;
-            node->perf_time_us += perf_time_us_cur;
+            node->perf_process_time_us  += perf_cur_process_time_us;
+            node->perf_real_time_us += perf_cur_real_time_us;
         }
     }
 
@@ -7568,19 +7560,19 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
 
     // performance stats (graph)
     {
-        int64_t perf_cycles_cur  = ggml_perf_cycles()  - perf_start_cycles;
-        int64_t perf_time_us_cur = ggml_perf_time_us() - perf_start_time_us;
+        int64_t perf_cur_process_time_us  = ggml_process_time_us()  - perf_process_time_start_us;
+        int64_t perf_cur_real_time_us = ggml_real_time_us() - perf_real_time_start_us;
 
         cgraph->perf_runs++;
-        cgraph->perf_cycles  += perf_cycles_cur;
-        cgraph->perf_time_us += perf_time_us_cur;
+        cgraph->perf_process_time_us  += perf_cur_process_time_us;
+        cgraph->perf_real_time_us += perf_cur_real_time_us;
 
         GGML_PRINT_DEBUG("%s: perf (%d) - cpu = %.3f / %.3f ms, wall = %.3f / %.3f ms\n",
                 __func__, cgraph->perf_runs,
-                (double) perf_cycles_cur      / (double) ggml_cycles_per_ms(),
-                (double) cgraph->perf_cycles  / (double) ggml_cycles_per_ms() / (double) cgraph->perf_runs,
-                (double) perf_time_us_cur     / 1000.0,
-                (double) cgraph->perf_time_us / 1000.0 / cgraph->perf_runs);
+                (double) perf_cur_process_time_us      / 1000.0,
+                (double) cgraph->perf_process_time_us  / 1000.0 / (double) cgraph->perf_runs,
+                (double) perf_cur_real_time_us     / 1000.0,
+                (double) cgraph->perf_real_time_us / 1000.0 / (double) cgraph->perf_runs);
     }
 }
 
@@ -7606,16 +7598,16 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
     for (int i = 0; i < cgraph->n_nodes; i++) {
         struct ggml_tensor * node = cgraph->nodes[i];
 
-        perf_total_per_op_us[node->op] += node->perf_time_us;
+        perf_total_per_op_us[node->op] += node->perf_real_time_us;
 
         GGML_PRINT(" - %3d: [ %6d, %6d, %6d] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
                 i,
                 node->ne[0], node->ne[1], node->ne[2],
                 GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
-                (double) node->perf_cycles  / (double) ggml_cycles_per_ms(),
-                (double) node->perf_cycles  / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
-                (double) node->perf_time_us / 1000.0,
-                (double) node->perf_time_us / 1000.0 / node->perf_runs);
+                (double) node->perf_process_time_us  / 1000.0,
+                (double) node->perf_process_time_us  / 1000.0 / (double) node->perf_runs,
+                (double) node->perf_real_time_us / 1000.0,
+                (double) node->perf_real_time_us / 1000.0 / node->perf_runs);
     }
 
     GGML_PRINT("n_leafs = %d\n", cgraph->n_leafs);
@@ -7894,10 +7886,10 @@ static enum ggml_opt_result ggml_opt_adam(
                     ggml_get_f32_1d(ps[i], 0), ggml_get_f32_1d(ps[i]->grad, 0));
         }
 
-        const int64_t t_start_wall = ggml_time_us();
-        const int64_t t_start_cpu = ggml_cycles();
-        UNUSED(t_start_wall);
-        UNUSED(t_start_cpu);
+        const int64_t t_real_start_us = ggml_real_time_us();
+        const int64_t t_process_start_us = ggml_process_time_us();
+        UNUSED(t_real_start_us);
+        UNUSED(t_process_start_us);
 
         {
             // update the gradient
@@ -7977,13 +7969,13 @@ static enum ggml_opt_result ggml_opt_adam(
         fx_prev = fx;
 
         {
-            const int64_t t_end_cpu = ggml_cycles();
-            GGML_PRINT_DEBUG("time iter:      %5.3f s\n", ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC);
-            UNUSED(t_end_cpu);
+            const int64_t t_process_end_us = ggml_process_time_us();
+            GGML_PRINT_DEBUG("time iter:      %5.3f s\n", (t_process_end_us - t_process_start_us)/1e6);
+            UNUSED(t_process_end_us);
 
-            const int64_t t_end_wall = ggml_time_us();
-            GGML_PRINT_DEBUG("wall time iter: %5.3f s\n", (t_end_wall - t_start_wall)/1e6);
-            UNUSED(t_end_wall);
+            const int64_t t_real_end_us = ggml_real_time_us();
+            GGML_PRINT_DEBUG("wall time iter: %5.3f s\n", (t_real_end_us - t_real_start_us)/1e6);
+            UNUSED(t_real_end_us);
         }
     }
 
diff --git a/ggml.h b/ggml.h
index f3c9e5a..f8febe6 100644
--- a/ggml.h
+++ b/ggml.h
@@ -275,8 +275,8 @@ struct ggml_tensor {
 
     // performance
     int     perf_runs;
-    int64_t perf_cycles;
-    int64_t perf_time_us;
+    int64_t perf_process_time_us;
+    int64_t perf_real_time_us;
 
     void * data;
     char padding[8];
@@ -297,8 +297,8 @@ struct ggml_cgraph {
 
     // performance
     int     perf_runs;
-    int64_t perf_cycles;
-    int64_t perf_time_us;
+    int64_t perf_process_time_us;
+    int64_t perf_real_time_us;
 };
 
 struct ggml_init_params {
@@ -308,10 +308,10 @@ struct ggml_init_params {
 };
 
 void    ggml_time_init(void); // call this once at the beginning of the program
-int64_t ggml_time_ms(void);
-int64_t ggml_time_us(void);
-int64_t ggml_cycles(void);
-int64_t ggml_cycles_per_ms(void);
+int64_t ggml_real_time_ms(void);
+int64_t ggml_real_time_us(void);
+int64_t ggml_process_time_ms(void);
+int64_t ggml_process_time_us(void);
 
 void ggml_print_object (const struct ggml_object * obj);
 void ggml_print_objects(const struct ggml_context * ctx);
diff --git a/whisper.cpp b/whisper.cpp
index c400856..b74c515 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2642,7 +2642,7 @@ whisper_token whisper_token_transcribe(void) {
 }
 
 void whisper_print_timings(struct whisper_context * ctx) {
-    const int64_t t_end_us = ggml_time_us();
+    const int64_t t_end_us = ggml_real_time_us();
 
     fprintf(stderr, "\n");
     fprintf(stderr, "%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);