[xcode] Support for ALAC encoded RSP/DAAP streaming

This commit is contained in:
ejurgensen 2023-12-05 21:47:44 +01:00
parent 85e9b06bca
commit 9749ded184
6 changed files with 153 additions and 68 deletions

View File

@ -187,19 +187,23 @@ library {
# Should we import the content of iTunes smart playlists?
# itunes_smartpl = false
# Decoding options for DAAP and RSP clients
# Transcoding options for DAAP and RSP clients
# Since iTunes has native support for mpeg, mp4a, mp4v, alac and wav,
# such files will be sent as they are. Any other formats will be decoded
# to raw wav. If OwnTone detects a non-iTunes DAAP client, it is
# assumed to only support mpeg and wav, other formats will be decoded.
# Here you can change when to decode. Note that these settings only
# affect serving media to DAAP and RSP clients, they have no effect on
# such files will be sent as they are. Any other formats will be
# transcoded. Some other clients, including Roku/RSP, announce what
# formats they support, and the server will transcode to one of those if
# necessary. Clients that don't announce supported formats are assumed
# to support mpeg (mp3), wav and alac.
# Here you can change when and how to transcode. The settings *only*
# affect serving audio to DAAP and RSP clients, they have no effect on
# direct AirPlay, Chromecast and local audio playback.
# Formats: mp4a, mp4v, mpeg, alac, flac, mpc, ogg, wma, wmal, wmav, aif, wav
# Formats that should never be decoded
# Formats that should never be transcoded
# no_decode = { "format", "format" }
# Formats that should always be decoded
# Formats that should always be transcoded
# force_decode = { "format", "format" }
# Prefer transcode to alac (default), wav or mpeg (mp3 at 320 kbps)
# prefer_format = "format"
# Set ffmpeg filters (similar to 'ffmpeg -af xxx') that you want the
# server to use when decoding files from your library. Examples:

View File

@ -111,6 +111,7 @@ static cfg_opt_t sec_library[] =
CFG_BOOL("itunes_smartpl", cfg_false, CFGF_NONE),
CFG_STR_LIST("no_decode", NULL, CFGF_NONE),
CFG_STR_LIST("force_decode", NULL, CFGF_NONE),
CFG_STR("prefer_format", NULL, CFGF_NONE),
CFG_BOOL("pipe_autostart", cfg_true, CFGF_NONE),
CFG_INT("pipe_sample_rate", 44100, CFGF_NONE),
CFG_INT("pipe_bits_per_sample", 16, CFGF_NONE),

View File

@ -119,6 +119,7 @@ static const struct content_type_map ext2ctype[] =
{ ".png", XCODE_PNG, "image/png" },
{ ".jpg", XCODE_JPEG, "image/jpeg" },
{ ".mp3", XCODE_MP3, "audio/mpeg" },
{ ".m4a", XCODE_MP4, "audio/mp4" },
{ ".wav", XCODE_WAV, "audio/wav" },
{ NULL, XCODE_NONE, NULL }
};
@ -678,7 +679,7 @@ stream_new_transcode(struct media_file_info *mfi, enum transcode_profile profile
int64_t offset, int64_t end_offset, event_callback_fn stream_cb)
{
struct stream_ctx *st;
struct media_quality quality = { HTTPD_STREAM_SAMPLE_RATE, HTTPD_STREAM_BPS, HTTPD_STREAM_CHANNELS, HTTPD_STREAM_BIT_RATE };
// struct media_quality quality = { HTTPD_STREAM_SAMPLE_RATE, HTTPD_STREAM_BPS, HTTPD_STREAM_CHANNELS, HTTPD_STREAM_BIT_RATE };
st = stream_new(mfi, hreq, stream_cb);
if (!st)
@ -686,7 +687,7 @@ stream_new_transcode(struct media_file_info *mfi, enum transcode_profile profile
goto error;
}
st->xcode = transcode_setup(profile, &quality, mfi->data_kind, mfi->path, mfi->song_length);
st->xcode = transcode_setup(profile, NULL, mfi->data_kind, mfi->path, mfi->song_length);
if (!st->xcode)
{
DPRINTF(E_WARN, L_HTTPD, "Transcoding setup failed, aborting streaming\n");

View File

@ -710,6 +710,16 @@ rsp_stream(struct httpd_request *hreq)
// /rsp/stream/36364
// /rsp/db/0?query=id%3D36365&type=full
// /rsp/stream/36365
//
// Headers sent from Roku M2000 and M1001 in stream requests (and other?):
//
// 'User-Agent': 'Roku SoundBridge/3.0'
// 'Host': '192.168.1.119:3689'
// 'Accept': '*/*'
// 'Pragma': 'no-cache'
// 'accept-codecs': 'wma,mpeg,wav,mp4a,alac'
// 'rsp-version': '0.1'
// 'transcode-codecs': 'wav,mp3'
static struct httpd_uri_map rsp_handlers[] =
{
{

View File

@ -63,8 +63,10 @@
#define WAV_HEADER_LEN 44
// Max filters in a filtergraph
#define MAX_FILTERS 9
// Set to same size as in httpd.c (but can be set to something else)
#define STREAM_CHUNK_SIZE (64 * 1024)
static const char *default_codecs = "mpeg,wav";
static const char *default_codecs = "mpeg,alac,wav";
static const char *roku_codecs = "mpeg,mp4a,wma,alac,wav";
static const char *itunes_codecs = "mpeg,mp4a,mp4v,alac,wav";
@ -93,8 +95,8 @@ struct settings_ctx
AVChannelLayout channel_layout;
#else
uint64_t channel_layout;
int channels;
#endif
int nb_channels;
int bit_rate;
int frame_size;
enum AVSampleFormat sample_format;
@ -289,6 +291,12 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
settings->frame_size = 352;
break;
case XCODE_MP4:
settings->encode_audio = true;
settings->format = "mp4";
settings->audio_codec = AV_CODEC_ID_ALAC;
break;
case XCODE_OGG:
settings->encode_audio = true;
settings->in_format = "ogg";
@ -371,6 +379,65 @@ init_settings(struct settings_ctx *settings, enum transcode_profile profile, str
return 0;
}
static int
init_settings_from_video(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, int width, int height)
{
settings->width = width;
settings->height = height;
return 0;
}
static int
init_settings_from_audio(struct settings_ctx *settings, enum transcode_profile profile, struct decode_ctx *src_ctx, struct media_quality *quality)
{
int src_bytes_per_sample = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
// Initialize unset settings that are source-dependent, not profile-dependent
if (!settings->sample_rate)
settings->sample_rate = src_ctx->audio_stream.codec->sample_rate;
#if USE_CH_LAYOUT
if (!av_channel_layout_check(&settings->channel_layout))
av_channel_layout_copy(&settings->channel_layout, &src_ctx->audio_stream.codec->ch_layout);
settings->nb_channels = settings->channel_layout.nb_channels;
#else
if (settings->nb_channels == 0)
{
settings->nb_channels = src_ctx->audio_stream.codec->channels;
settings->channel_layout = src_ctx->audio_stream.codec->channel_layout;
}
#endif
// Initialize settings that are both source-dependent and profile-dependent
switch (profile)
{
case XCODE_MP4:
if (!settings->sample_format)
settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32P : AV_SAMPLE_FMT_S16P;
break;
case XCODE_PCM_NATIVE:
if (!settings->sample_format)
settings->sample_format = (src_bytes_per_sample == 4) ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S16;
if (!settings->audio_codec)
settings->audio_codec = (src_bytes_per_sample == 4) ? AV_CODEC_ID_PCM_S32LE : AV_CODEC_ID_PCM_S16LE;
if (!settings->format)
settings->format = (src_bytes_per_sample == 4) ? "s32le" : "s16le";
break;
default:
if (settings->sample_format && settings->audio_codec && settings->format)
return 0;
DPRINTF(E_LOG, L_XCODE, "Bug! Profile %d has unset encoding parameters\n", profile);
return -1;
}
return 0;
}
static void
stream_settings_set(struct stream_ctx *s, struct settings_ctx *settings, enum AVMediaType type)
{
@ -474,6 +541,8 @@ size_estimate(enum transcode_profile profile, int bit_rate, int sample_rate, int
bytes = (int64_t)len_ms * channels * bytes_per_sample * sample_rate / 1000 + WAV_HEADER_LEN;
else if (profile == XCODE_MP3)
bytes = (int64_t)len_ms * bit_rate / 8000;
else if (profile == XCODE_MP4)
bytes = (int64_t)len_ms * channels * bytes_per_sample * sample_rate / 1000 / 2; // FIXME
else
bytes = -1;
@ -1187,6 +1256,7 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
// Not const before ffmpeg 5.0
AVOutputFormat *oformat;
#endif
AVDictionary *options = NULL;
int ret;
oformat = av_guess_format(ctx->settings.format, NULL, NULL);
@ -1236,14 +1306,30 @@ open_output(struct encode_ctx *ctx, struct decode_ctx *src_ctx)
goto out_free_streams;
}
// By default ffmpeg can't mux mp4 to a stream, since it is non-seekable, and
// normally the muxing involves writing some header bytes when the encoding is
// completed. This is solution for that found on stackoverflow. "movflags" set
// to "empty_moov" was also suggested, but it doesn't seem required.
if (strcmp("mp4", oformat->name) == 0)
{
av_dict_set_int(&options, "frag_size", STREAM_CHUNK_SIZE, 0);
// av_dict_set(&options, "movflags", "empty_moov", 0);
}
// Notice, this will not write WAV header (so we do that manually)
ret = avformat_write_header(ctx->ofmt_ctx, NULL);
ret = avformat_write_header(ctx->ofmt_ctx, &options);
if (ret < 0)
{
DPRINTF(E_LOG, L_XCODE, "Error writing header to output buffer: %s\n", err2str(ret));
goto out_free_streams;
}
if (options)
{
DPRINTF(E_WARN, L_XCODE, "Didn't recognize all options given to avformat_write_header\n");
av_dict_free(&options);
}
if (ctx->settings.with_wav_header)
{
evbuffer_add(ctx->obuf, ctx->wav_header, sizeof(ctx->wav_header));
@ -1631,71 +1717,30 @@ struct encode_ctx *
transcode_encode_setup(enum transcode_profile profile, struct media_quality *quality, struct decode_ctx *src_ctx, int width, int height)
{
struct encode_ctx *ctx;
int src_bytes_per_sample;
int dst_bytes_per_sample;
int channels;
CHECK_NULL(L_XCODE, ctx = calloc(1, sizeof(struct encode_ctx)));
CHECK_NULL(L_XCODE, ctx->filt_frame = av_frame_alloc());
CHECK_NULL(L_XCODE, ctx->encoded_pkt = av_packet_alloc());
// Initialize general settings
if (init_settings(&ctx->settings, profile, quality) < 0)
goto fail_free;
ctx->settings.width = width;
ctx->settings.height = height;
if (ctx->settings.encode_audio && init_settings_from_audio(&ctx->settings, profile, src_ctx, quality) < 0)
goto fail_free;
// Caller did not specify a sample rate -> use same as source
if (!ctx->settings.sample_rate && ctx->settings.encode_audio)
{
ctx->settings.sample_rate = src_ctx->audio_stream.codec->sample_rate;
}
// Caller did not specify a sample format -> determine from source
if (!ctx->settings.sample_format && ctx->settings.encode_audio)
{
src_bytes_per_sample = av_get_bytes_per_sample(src_ctx->audio_stream.codec->sample_fmt);
if (src_bytes_per_sample == 4)
{
ctx->settings.sample_format = AV_SAMPLE_FMT_S32;
ctx->settings.audio_codec = AV_CODEC_ID_PCM_S32LE;
ctx->settings.format = "s32le";
}
else
{
ctx->settings.sample_format = AV_SAMPLE_FMT_S16;
ctx->settings.audio_codec = AV_CODEC_ID_PCM_S16LE;
ctx->settings.format = "s16le";
}
}
#if USE_CH_LAYOUT
// Caller did not specify channels -> use same as source
if (!av_channel_layout_check(&ctx->settings.channel_layout) && ctx->settings.encode_audio)
{
av_channel_layout_copy(&ctx->settings.channel_layout, &src_ctx->audio_stream.codec->ch_layout);
}
channels = ctx->settings.channel_layout.nb_channels;
#else
// Caller did not specify channels -> use same as source
if (ctx->settings.channels == 0 && ctx->settings.encode_audio)
{
ctx->settings.channels = src_ctx->audio_stream.codec->channels;
ctx->settings.channel_layout = src_ctx->audio_stream.codec->channel_layout;
}
channels = ctx->settings.channels;
#endif
if (ctx->settings.encode_video && init_settings_from_video(&ctx->settings, profile, src_ctx, width, height) < 0)
goto fail_free;
dst_bytes_per_sample = av_get_bytes_per_sample(ctx->settings.sample_format);
ctx->bytes_total = size_estimate(profile, ctx->settings.bit_rate, ctx->settings.sample_rate, dst_bytes_per_sample, channels, src_ctx->len_ms);
ctx->bytes_total = size_estimate(profile, ctx->settings.bit_rate, ctx->settings.sample_rate, dst_bytes_per_sample, ctx->settings.nb_channels, src_ctx->len_ms);
if (ctx->settings.with_wav_header)
make_wav_header(ctx->wav_header, ctx->settings.sample_rate, dst_bytes_per_sample, channels, ctx->bytes_total);
make_wav_header(ctx->wav_header, ctx->settings.sample_rate, dst_bytes_per_sample, ctx->settings.nb_channels, ctx->bytes_total);
if (ctx->settings.with_icy && src_ctx->data_kind == DATA_KIND_HTTP)
ctx->icy_interval = METADATA_ICY_INTERVAL * channels * dst_bytes_per_sample * ctx->settings.sample_rate;
ctx->icy_interval = METADATA_ICY_INTERVAL * ctx->settings.nb_channels * dst_bytes_per_sample * ctx->settings.sample_rate;
if (open_output(ctx, src_ctx) < 0)
goto fail_free;
@ -1804,9 +1849,13 @@ transcode_decode_setup_raw(enum transcode_profile profile, struct media_quality
enum transcode_profile
transcode_needed(const char *user_agent, const char *client_codecs, char *file_codectype)
{
char *codectype;
const char *codectype;
const char *prefer_format;
cfg_t *lib;
bool force_xcode;
bool supports_alac;
bool supports_mpeg;
bool supports_wav;
int count;
int i;
@ -1862,10 +1911,28 @@ transcode_needed(const char *user_agent, const char *client_codecs, char *file_c
if (!force_xcode && strstr(client_codecs, file_codectype))
return XCODE_NONE;
else if (strstr(client_codecs, "mpeg"))
return XCODE_MP3;
else if (strstr(client_codecs, "wav"))
supports_alac = strstr(client_codecs, "alac") || strstr(client_codecs, "mp4a");
supports_mpeg = strstr(client_codecs, "mpeg");
supports_wav = strstr(client_codecs, "wav");
prefer_format = cfg_getstr(lib, "prefer_format");
if (prefer_format)
{
if (strcmp(prefer_format, "alac") == 0 && supports_alac)
return XCODE_MP4;
else if (strcmp(prefer_format, "wav") == 0 && supports_wav)
return XCODE_WAV;
else if (strcmp(prefer_format, "mpeg") == 0 && supports_mpeg)
return XCODE_MP3;
}
if (supports_alac)
return XCODE_MP4;
else if (supports_wav)
return XCODE_WAV;
else if (supports_mpeg)
return XCODE_MP3;
else
return XCODE_UNKNOWN;
}

View File

@ -23,10 +23,12 @@ enum transcode_profile
XCODE_PCM32,
// Transcodes the best audio stream to MP3
XCODE_MP3,
// Transcodes the best audio stream to OPUS
// Transcodes the best audio stream to raw OPUS (no container)
XCODE_OPUS,
// Transcodes the best audio stream to ALAC
// Transcodes the best audio stream to raw ALAC (no container)
XCODE_ALAC,
// Transcodes the best audio stream to ALAC in a MP4 container
XCODE_MP4,
// Transcodes the best audio stream from OGG
XCODE_OGG,
// Transcodes the best video stream to JPEG/PNG/VP8