mtmd-audio.h 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. #pragma once
  2. #include "ggml.h"
  3. #include "clip-model.h"
  4. #include <cstdint>
  5. #include <vector>
  6. #include <string>
  7. #define MTMD_INTERNAL_HEADER
  8. struct mtmd_audio_mel {
  9. int n_len;
  10. int n_len_org;
  11. int n_mel;
  12. std::vector<float> data;
  13. };
  14. struct mtmd_audio_preprocessor {
  15. const clip_hparams & hparams;
  16. mtmd_audio_preprocessor(const clip_ctx * ctx): hparams(*clip_get_hparams(ctx)) {}
  17. virtual ~mtmd_audio_preprocessor() = default;
  18. virtual void initialize() = 0; // NOT thread-safe
  19. virtual bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) = 0;
  20. };
  21. struct mtmd_audio_preprocessor_whisper : mtmd_audio_preprocessor {
  22. mtmd_audio_preprocessor_whisper(const clip_ctx * ctx) : mtmd_audio_preprocessor(ctx) {}
  23. void initialize() override;
  24. bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) override;
  25. };
  26. struct mtmd_audio_preprocessor_conformer : mtmd_audio_preprocessor {
  27. mtmd_audio_preprocessor_conformer(const clip_ctx * ctx) : mtmd_audio_preprocessor(ctx) {}
  28. void initialize() override;
  29. bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) override;
  30. };