diff --git a/README.md b/README.md
index 5a20073..28e57a6 100644
--- a/README.md
+++ b/README.md
@@ -112,6 +112,7 @@ options:
   -otxt,    --output-txt     output result in a text file
   -ovtt,    --output-vtt     output result in a vtt file
   -osrt,    --output-srt     output result in a srt file
+  -ojson    --output-json    output result with confidence in a json file
   -owts,    --output-words   output script for generating karaoke video
   -ps,      --print_special  print special tokens
   -pc,      --print_colors   print colors
@@ -314,14 +315,14 @@ to highlight words with high or low confidence:
 
 ## Controlling the length of the generated text segments (experimental)
 
-For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: 
+For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
 
 ```java
 ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
 
 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
 
 main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
 
@@ -345,11 +346,11 @@ The `--max-len` argument can be used to obtain word-level timestamps. Simply use
 
 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
 
 main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
 
-[00:00:00.000 --> 00:00:00.320]  
+[00:00:00.000 --> 00:00:00.320]
 [00:00:00.320 --> 00:00:00.370]   And
 [00:00:00.370 --> 00:00:00.690]   so
 [00:00:00.690 --> 00:00:00.850]   my
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index a1b9825..6f00285 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -65,6 +65,7 @@ struct whisper_params {
     bool output_txt           = false;
     bool output_vtt           = false;
     bool output_srt           = false;
+    bool output_json          = false;
     bool output_wts           = false;
     bool print_special_tokens = false;
     bool print_colors         = false;
@@ -126,6 +127,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
             params.output_srt = true;
         } else if (arg == "-owts" || arg == "--output-words") {
             params.output_wts = true;
+        } else if (arg == "-ojson" || arg == "--output-json") {
+            params.output_json = true;
         } else if (arg == "-ps" || arg == "--print_special") {
             params.print_special_tokens = true;
         } else if (arg == "-pc" || arg == "--print_colors") {
@@ -170,6 +173,7 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
     fprintf(stderr, "  -otxt,    --output-txt     output result in a text file\n");
     fprintf(stderr, "  -ovtt,    --output-vtt     output result in a vtt file\n");
     fprintf(stderr, "  -osrt,    --output-srt     output result in a srt file\n");
+    fprintf(stderr, "  -ojson,   --output-json    output result with confidence in a json file\n");
     fprintf(stderr, "  -owts,    --output-words   output script for generating karaoke video\n");
     fprintf(stderr, "  -ps,      --print_special  print special tokens\n");
     fprintf(stderr, "  -pc,      --print_colors   print colors\n");
@@ -307,6 +311,67 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
         fout << text << "\n\n";
     }
 
+    return true;
+}
+
+bool output_json(struct whisper_context * ctx, const char * fname, whisper_params params) {
+    std::ofstream fout(fname);
+    if (!fout.is_open()) {
+        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
+        return 9;
+    }
+
+    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
+
+    fout << "[\n";
+
+    const int n_segments = whisper_full_n_segments(ctx);
+    for (int i = 0; i < n_segments; ++i) {
+        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+        fout << "{\"start\":" << t0 << ", \"end\": " << t1;
+        fout << ", \"text\": [";
+        int n_token = whisper_full_n_tokens(ctx, i);
+        if (whisper_full_get_token_id(ctx, i, n_token - 1) >= whisper_token_eot(ctx)) {
+            --n_token;
+        }
+        for (int j = 0; j < n_token; ++j) {
+            const whisper_token id = whisper_full_get_token_id(ctx, i, j);
+            if (id >= whisper_token_eot(ctx)) {
+                continue;
+            }
+            const char * text = whisper_full_get_token_text(ctx, i, j);
+            size_t len = strlen(text) + 1;
+            char esc[len * 2];
+            memset (esc, 0, len * 2);
+            memcpy(esc, text, len);
+            size_t off = 0;
+            for (size_t tp = 0; text[tp] != '\0'; tp++) {
+                if (text[tp] == '"') {
+                    esc[tp + off] = '\\';
+                    ++off;
+                }
+                esc[tp + off] = text[tp];
+            }
+            const float  p    = whisper_full_get_token_p   (ctx, i, j);
+            const int conf = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
+            fout << "[" << conf << ", \"" << esc << "\"]";
+            if (j != n_token - 1){
+                fout << ",";
+            }
+        }
+        fout << "";
+        fout << "]}";
+        if (i != n_segments - 1){
+            fout << ",";
+        }
+        fout << "\n";
+    }
+    fout << "]";
+
+
+
+
     return true;
 }
 
@@ -603,11 +668,18 @@ int main(int argc, char ** argv) {
                 output_srt(ctx, fname_srt.c_str(), params);
             }
 
+            // output to JSON file
+            if (params.output_json) {
+                const auto fname_json = fname_inp + ".json";
+                output_json(ctx, fname_json.c_str(), params);
+            }
+
             // output to WTS file
             if (params.output_wts) {
                 const auto fname_wts = fname_inp + ".wts";
                 output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
             }
+
         }
     }