1
0

mtmd-audio.h 858 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #pragma once
  2. #include "ggml.h"
  3. #include <cstdint>
  4. #include <vector>
  5. #include <string>
  6. #define WHISPER_ASSERT GGML_ASSERT
  7. #define WHISPER_SAMPLE_RATE 16000
  8. #define WHISPER_N_FFT 400
  9. #define WHISPER_HOP_LENGTH 160
  10. #define WHISPER_CHUNK_SIZE 30
  11. #define COMMON_SAMPLE_RATE 16000
  12. namespace whisper_preprocessor {
  13. struct whisper_mel {
  14. int n_len;
  15. int n_len_org;
  16. int n_mel;
  17. std::vector<float> data;
  18. };
  19. struct whisper_filters {
  20. int32_t n_mel;
  21. int32_t n_fft;
  22. std::vector<float> data;
  23. };
  24. bool preprocess_audio(
  25. const float * samples,
  26. size_t n_samples,
  27. const whisper_filters & filters,
  28. std::vector<whisper_mel> & output);
  29. } // namespace whisper_preprocessor
  30. namespace whisper_precalc_filters {
  31. whisper_preprocessor::whisper_filters get_128_bins();
  32. } // namespace whisper_precalc_filters