|
|
@@ -24,10 +24,10 @@ on:
|
|
|
push:
|
|
|
branches:
|
|
|
- master
|
|
|
- paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
|
|
|
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
|
|
pull_request_target:
|
|
|
types: [opened, synchronize, reopened]
|
|
|
- paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
|
|
|
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
|
|
schedule:
|
|
|
- cron: '04 2 * * *'
|
|
|
|
|
|
@@ -42,6 +42,16 @@ jobs:
|
|
|
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
|
|
N_USERS: 8
|
|
|
DURATION: 10m
|
|
|
+
|
|
|
+ strategy:
|
|
|
+ matrix:
|
|
|
+ model: [phi-2]
|
|
|
+ ftype: [q4_0, q8_0, f16]
|
|
|
+ include:
|
|
|
+ - model: phi-2
|
|
|
+ ftype: q4_0
|
|
|
+ pr_comment_enabled: "true"
|
|
|
+
|
|
|
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
|
|
|
steps:
|
|
|
- name: Clone
|
|
|
@@ -116,7 +126,7 @@ jobs:
|
|
|
--scenario script.js \
|
|
|
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
|
|
--hf-repo ggml-org/models \
|
|
|
- --hf-file phi-2/ggml-model-q4_0.gguf \
|
|
|
+ --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
|
|
--model-path-prefix /models \
|
|
|
--parallel ${{ env.N_USERS }} \
|
|
|
-ngl 33 \
|
|
|
@@ -134,7 +144,7 @@ jobs:
|
|
|
|
|
|
- uses: actions/upload-artifact@v4
|
|
|
with:
|
|
|
- name: benchmark-results
|
|
|
+ name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
|
compression-level: 9
|
|
|
path: |
|
|
|
examples/server/bench/*.jpg
|
|
|
@@ -146,7 +156,7 @@ jobs:
|
|
|
with:
|
|
|
authToken: ${{secrets.GITHUB_TOKEN}}
|
|
|
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
|
|
- context: bench-server-baseline
|
|
|
+ context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
|
description: |
|
|
|
${{ env.BENCH_RESULTS }}
|
|
|
state: 'success'
|
|
|
@@ -203,11 +213,19 @@ jobs:
|
|
|
- name: Comment PR
|
|
|
uses: mshick/add-pr-comment@v2
|
|
|
id: comment_pr
|
|
|
- if: ${{ github.event.pull_request != '' }}
|
|
|
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
|
|
with:
|
|
|
- message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
|
|
|
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
|
message: |
|
|
|
- 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
|
|
+ <p align="center">
|
|
|
+
|
|
|
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
|
|
+
|
|
|
+ </p>
|
|
|
+
|
|
|
+ <details>
|
|
|
+
|
|
|
+ <summary>Expand details for performance related PR only</summary>
|
|
|
|
|
|
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
|
|
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
|
|
@@ -215,9 +233,6 @@ jobs:
|
|
|
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
|
|
|
- ${{ env.BENCH_GRAPH_XLABEL }}
|
|
|
|
|
|
- <details>
|
|
|
-
|
|
|
- <summary>Time series</summary>
|
|
|
|
|
|
<p align="center">
|
|
|
|