From 179c7e74b56511fafaf9972568d2bb7928be4889 Mon Sep 17 00:00:00 2001 From: Bruno Windels Date: Fri, 25 Feb 2022 16:54:00 +0100 Subject: [PATCH] WIP6 --- src/matrix/calls/CallHandler.ts | 2 + src/matrix/calls/LocalMedia.ts | 57 ++ src/matrix/calls/PeerCall.ts | 822 ++++++++++----------------- src/matrix/calls/TODO.md | 72 ++- src/platform/types/WebRTC.ts | 13 +- src/platform/web/dom/MediaDevices.ts | 7 +- src/platform/web/dom/WebRTC.ts | 32 +- src/utils/recursivelyAssign.ts | 39 ++ 8 files changed, 487 insertions(+), 557 deletions(-) create mode 100644 src/matrix/calls/LocalMedia.ts create mode 100644 src/utils/recursivelyAssign.ts diff --git a/src/matrix/calls/CallHandler.ts b/src/matrix/calls/CallHandler.ts index a31ff666..4a2027f3 100644 --- a/src/matrix/calls/CallHandler.ts +++ b/src/matrix/calls/CallHandler.ts @@ -44,6 +44,8 @@ export class GroupCallHandler { } + // TODO: check and poll turn server credentials here + handleRoomState(room: Room, events: StateEvent[], log: ILogItem) { // first update call events for (const event of events) { diff --git a/src/matrix/calls/LocalMedia.ts b/src/matrix/calls/LocalMedia.ts new file mode 100644 index 00000000..2429ff2c --- /dev/null +++ b/src/matrix/calls/LocalMedia.ts @@ -0,0 +1,57 @@ +/* +Copyright 2022 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import {StreamPurpose} from "../../platform/types/WebRTC"; +import {Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices"; +import {SDPStreamMetadata} from "./callEventTypes"; + +export class LocalMedia { + constructor( + public readonly cameraTrack?: Track, + public readonly screenShareTrack?: Track, + public readonly microphoneTrack?: AudioTrack + ) {} + + withTracks(tracks: Track[]) { + const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack; + const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack; + const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack; + if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) { + throw new Error("The camera and audio track should have the same stream id"); + } + return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack); + } + + get tracks(): Track[] { return []; } + + getSDPMetadata(): SDPStreamMetadata { + const metadata = {}; + const userMediaTrack = this.microphoneTrack ?? this.cameraTrack; + if (userMediaTrack) { + metadata[userMediaTrack.streamId] = { + purpose: StreamPurpose.UserMedia, + audio_muted: this.microphoneTrack?.muted ?? false, + video_muted: this.cameraTrack?.muted ?? false, + }; + } + if (this.screenShareTrack) { + metadata[this.screenShareTrack.streamId] = { + purpose: StreamPurpose.ScreenShare + }; + } + return metadata; + } +} diff --git a/src/matrix/calls/PeerCall.ts b/src/matrix/calls/PeerCall.ts index 47dfaa3e..17e49afd 100644 --- a/src/matrix/calls/PeerCall.ts +++ b/src/matrix/calls/PeerCall.ts @@ -15,7 +15,7 @@ limitations under the License. */ import {ObservableMap} from "../../observable/map/ObservableMap"; - +import {recursivelyAssign} from "../../utils/recursivelyAssign"; import {AsyncQueue} from "../../utils/AsyncQueue"; import type {Room} from "../room/Room"; import type {StateEvent} from "../storage/types"; @@ -23,8 +23,284 @@ import type {ILogItem} from "../../logging/types"; import {WebRTC, PeerConnection, PeerConnectionHandler, StreamPurpose} from "../../platform/types/WebRTC"; import {MediaDevices, Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices"; +import type {LocalMedia} from "./LocalMedia"; -import { randomString } from '../randomstring'; +// when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already +// do for sharing keys will be best as that already deals with room tracking. +/** + * Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform + * */ +/** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/ +class PeerCall { + private readonly peerConnection: PeerConnection; + private state = CallState.Fledgling; + private direction: CallDirection; + // A queue for candidates waiting to go out. + // We try to amalgamate candidates into a single candidate message where + // possible + private candidateSendQueue: Array = []; + // If candidates arrive before we've picked an opponent (which, in particular, + // will happen if the opponent sends candidates eagerly before the user answers + // the call) we buffer them up here so we can then add the ones from the party we pick + private remoteCandidateBuffer? = new Map(); + + private logger: any; + private remoteSDPStreamMetadata?: SDPStreamMetadata; + private responsePromiseChain?: Promise; + private opponentPartyId?: PartyId; + + constructor( + private readonly handler: PeerCallHandler, + private localMedia: LocalMedia, + webRTC: WebRTC + ) { + const outer = this; + this.peerConnection = webRTC.createPeerConnection({ + onIceConnectionStateChange(state: RTCIceConnectionState) {}, + onLocalIceCandidate(candidate: RTCIceCandidate) {}, + onIceGatheringStateChange(state: RTCIceGatheringState) {}, + onRemoteTracksChanged(tracks: Track[]) {}, + onDataChannelChanged(dataChannel: DataChannel | undefined) {}, + onNegotiationNeeded() { + const promiseCreator = () => outer.handleNegotiation(); + outer.responsePromiseChain = outer.responsePromiseChain?.then(promiseCreator) ?? promiseCreator(); + }, + getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose { + return outer.remoteSDPStreamMetadata?.[streamId]?.purpose ?? SDPStreamMetadataPurpose.Usermedia; + } + }); + this.logger = { + debug(...args) { console.log.apply(console, ["WebRTC debug:", ...args])}, + log(...args) { console.log.apply(console, ["WebRTC log:", ...args])}, + warn(...args) { console.log.apply(console, ["WebRTC warn:", ...args])}, + error(...args) { console.error.apply(console, ["WebRTC error:", ...args])}, + } + } + + handleIncomingSignallingMessage(message: SignallingMessage, partyId: PartyId) { + switch (message.type) { + case EventType.Invite: + this.handleInvite(message.content); + break; + case EventType.Answer: + this.handleAnswer(message.content); + break; + case EventType.Candidates: + this.handleRemoteIceCandidates(message.content); + break; + case EventType.Hangup: + } + } + + async call(localMediaPromise: Promise): Promise { + if (this.state !== CallState.Fledgling) { + return; + } + this.direction = CallDirection.Outbound; + this.setState(CallState.WaitLocalMedia); + try { + this.localMedia = await localMediaPromise; + } catch (err) { + this.setState(CallState.Ended); + return; + } + this.setState(CallState.CreateOffer); + // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called + for (const t of this.localMedia.tracks) { + this.peerConnection.addTrack(t); + } + await this.waitForState(CallState.InviteSent); + } + + async answer() { + + } + + async hangup() { + + } + + async updateLocalMedia(localMediaPromise: Promise) { + const oldMedia = this.localMedia; + this.localMedia = await localMediaPromise; + + const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => { + const oldTrack = selectTrack(oldMedia); + const newTrack = selectTrack(this.localMedia); + if (oldTrack && newTrack) { + this.peerConnection.replaceTrack(oldTrack, newTrack); + } else if (oldTrack) { + this.peerConnection.removeTrack(oldTrack); + } else if (newTrack) { + this.peerConnection.addTrack(newTrack); + } + }; + + // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called + applyTrack(m => m.microphoneTrack); + applyTrack(m => m.cameraTrack); + applyTrack(m => m.screenShareTrack); + } + + // calls are serialized and deduplicated by negotiationQueue + private handleNegotiation = async (): Promise => { + try { + await this.peerConnection.setLocalDescription(); + } catch (err) { + this.logger.debug(`Call ${this.callId} Error setting local description!`, err); + this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true); + return; + } + + if (this.peerConnection.iceGatheringState === 'gathering') { + // Allow a short time for initial candidates to be gathered + await new Promise(resolve => setTimeout(resolve, 200)); + } + + if (this.state === CallState.Ended) { + return; + } + + const offer = this.peerConnection.localDescription!; + // Get rid of any candidates waiting to be sent: they'll be included in the local + // description we just got and will send in the offer. + this.logger.info(`Call ${this.callId} Discarding ${ + this.candidateSendQueue.length} candidates that will be sent in offer`); + this.candidateSendQueue = []; + + // need to queue this + const content = { + offer, + [SDPStreamMetadataKey]: this.localMedia.getSDPMetadata(), + version: 1, + lifetime: CALL_TIMEOUT_MS + }; + if (this.state === CallState.CreateOffer) { + await this.handler.sendSignallingMessage({type: EventType.Invite, content}); + this.setState(CallState.InviteSent); + } + }; + + private async handleInvite(content: InviteContent, partyId: PartyId): Promise { + if (this.state !== CallState.Fledgling || this.opponentPartyId !== undefined) { + // TODO: hangup or ignore? + return; + } + + // we must set the party ID before await-ing on anything: the call event + // handler will start giving us more call events (eg. candidates) so if + // we haven't set the party ID, we'll ignore them. + this.opponentPartyId = partyId; + this.direction = CallDirection.Inbound; + + const sdpStreamMetadata = content[SDPStreamMetadataKey]; + if (sdpStreamMetadata) { + this.updateRemoteSDPStreamMetadata(sdpStreamMetadata); + } else { + this.logger.debug(`Call ${ + this.callId} did not get any SDPStreamMetadata! Can not send/receive multiple streams`); + } + + try { + await this.peerConnection.setRemoteDescription(content.offer); + await this.addBufferedIceCandidates(); + } catch (e) { + this.logger.debug(`Call ${this.callId} failed to set remote description`, e); + this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false); + return; + } + + // According to previous comments in this file, firefox at some point did not + // add streams until media started arriving on them. Testing latest firefox + // (81 at time of writing), this is no longer a problem, so let's do it the correct way. + if (this.peerConnection.remoteTracks.length === 0) { + this.logger.error(`Call ${this.callId} no remote stream or no tracks after setting remote description!`); + this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false); + return; + } + + this.setState(CallState.Ringing); + + setTimeout(() => { + if (this.state == CallState.Ringing) { + this.logger.debug(`Call ${this.callId} invite has expired. Hanging up.`); + this.hangupParty = CallParty.Remote; // effectively + this.setState(CallState.Ended); + this.stopAllMedia(); + if (this.peerConnection.signalingState != 'closed') { + this.peerConnection.close(); + } + this.emit(CallEvent.Hangup); + } + }, content.lifetime ?? CALL_TIMEOUT_MS /* - event.getLocalAge() */ ); + } + + private updateRemoteSDPStreamMetadata(metadata: SDPStreamMetadata): void { + this.remoteSDPStreamMetadata = recursivelyAssign(this.remoteSDPStreamMetadata || {}, metadata, true); + // will rerequest stream purpose for all tracks and set track.type accordingly + this.peerConnection.notifyStreamPurposeChanged(); + for (const track of this.peerConnection.remoteTracks) { + const streamMetaData = this.remoteSDPStreamMetadata?.[track.streamId]; + if (streamMetaData) { + if (track.type === TrackType.Microphone) { + track.setMuted(streamMetaData.audio_muted); + } else { // Camera or ScreenShare + track.setMuted(streamMetaData.video_muted); + } + } + } + } + + + private async addBufferedIceCandidates(): Promise { + const bufferedCandidates = this.remoteCandidateBuffer!.get(this.opponentPartyId!); + if (bufferedCandidates) { + this.logger.info(`Call ${this.callId} Adding ${ + bufferedCandidates.length} buffered candidates for opponent ${this.opponentPartyId}`); + await this.addIceCandidates(bufferedCandidates); + } + this.remoteCandidateBuffer = undefined; + } + + private async addIceCandidates(candidates: RTCIceCandidate[]): Promise { + for (const candidate of candidates) { + if ( + (candidate.sdpMid === null || candidate.sdpMid === undefined) && + (candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined) + ) { + this.logger.debug(`Call ${this.callId} ignoring remote ICE candidate with no sdpMid or sdpMLineIndex`); + continue; + } + this.logger.debug(`Call ${this.callId} got remote ICE ${candidate.sdpMid} candidate: ${candidate.candidate}`); + try { + await this.peerConnection.addIceCandidate(candidate); + } catch (err) { + if (!this.ignoreOffer) { + this.logger.info(`Call ${this.callId} failed to add remote ICE candidate`, err); + } + } + } + } + + + private setState(state: CallState): void { + const oldState = this.state; + this.state = state; + this.handler.emitUpdate(); + } + + private waitForState(state: CallState): Promise { + + } + + private async terminate(hangupParty: CallParty, hangupReason: CallErrorCode, shouldEmit: boolean): Promise { + + } +} + + + +//import { randomString } from '../randomstring'; import { MCallReplacesEvent, MCallAnswer, @@ -41,41 +317,13 @@ import { MCallHangupReject, } from './callEventTypes'; - -const GROUP_CALL_TYPE = "m.call"; -const GROUP_CALL_MEMBER_TYPE = "m.call.member"; - - -/** - * Fires whenever an error occurs when call.js encounters an issue with setting up the call. - *

- * The error given will have a code equal to either `MatrixCall.ERR_LOCAL_OFFER_FAILED` or - * `MatrixCall.ERR_NO_USER_MEDIA`. `ERR_LOCAL_OFFER_FAILED` is emitted when the local client - * fails to create an offer. `ERR_NO_USER_MEDIA` is emitted when the user has denied access - * to their audio/video hardware. - * - * @event module:webrtc/call~MatrixCall#"error" - * @param {Error} err The error raised by MatrixCall. - * @example - * matrixCall.on("error", function(err){ - * console.error(err.code, err); - * }); - */ - // null is used as a special value meaning that the we're in a legacy 1:1 call // without MSC2746 that doesn't provide an id which device sent the message. type PartyId = string | null; -interface TurnServer { - urls: Array; - username?: string; - password?: string; - ttl?: number; -} - -interface AssertedIdentity { - id: string; - displayName: string; +export enum CallParty { + Local = 'local', + Remote = 'remote', } export enum CallState { @@ -90,21 +338,11 @@ export enum CallState { Ended = 'ended', } -export enum CallType { - Voice = 'voice', - Video = 'video', -} - export enum CallDirection { Inbound = 'inbound', Outbound = 'outbound', } -export enum CallParty { - Local = 'local', - Remote = 'remote', -} - export enum EventType { Invite = "m.call.invite", Candidates = "m.call.candidates", @@ -120,30 +358,6 @@ export enum EventType { AssertedIdentityPrefix = "org.matrix.call.asserted_identity", } -export enum CallEvent { - Hangup = 'hangup', - State = 'state', - Error = 'error', - Replaced = 'replaced', - - // The value of isLocalOnHold() has changed - LocalHoldUnhold = 'local_hold_unhold', - // The value of isRemoteOnHold() has changed - RemoteHoldUnhold = 'remote_hold_unhold', - // backwards compat alias for LocalHoldUnhold: remove in a major version bump - HoldUnhold = 'hold_unhold', - // Feeds have changed - FeedsChanged = 'feeds_changed', - - AssertedIdentityChanged = 'asserted_identity_changed', - - LengthChanged = 'length_changed', - - DataChannel = 'datachannel', - - SendVoipEvent = "send_voip_event", -} - export enum CallErrorCode { /** The user chose to end the call */ UserHangup = 'user_hangup', @@ -254,477 +468,21 @@ export class CallError extends Error { } } -export function genCallID(): string { - return Date.now().toString() + randomString(16); -} - -enum CallSetupMessageType { - Invite = "m.call.invite", - Answer = "m.call.answer", - Candidates = "m.call.candidates", - Hangup = "m.call.hangup", -} - -const CALL_ID = "m.call_id"; -const CALL_TERMINATED = "m.terminated"; - -class LocalMedia { - constructor( - public readonly cameraTrack?: Track, - public readonly screenShareTrack?: Track, - public readonly microphoneTrack?: AudioTrack - ) {} - - withTracks(tracks: Track[]) { - const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack; - const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack; - const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack; - if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) { - throw new Error("The camera and audio track should have the same stream id"); - } - return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack); - } - - get tracks(): Track[] { return []; } - - getSDPMetadata(): any { - const metadata = {}; - const userMediaTrack = this.microphoneTrack ?? this.cameraTrack; - if (userMediaTrack) { - metadata[userMediaTrack.streamId] = { - purpose: StreamPurpose.UserMedia, - audio_muted: this.microphoneTrack?.muted ?? false, - video_muted: this.cameraTrack?.muted ?? false, - }; - } - if (this.screenShareTrack) { - metadata[this.screenShareTrack.streamId] = { - purpose: StreamPurpose.ScreenShare - }; - } - return metadata; - } +type InviteContent = { + offer: RTCSessionDescriptionInit, + [SDPStreamMetadataKey]: SDPStreamMetadata, + version?: number, + lifetime?: number } export type InviteMessage = { type: EventType.Invite, - content: { - version: number - } + content: InviteContent } +export type SignallingMessage = InviteMessage; + export interface PeerCallHandler { emitUpdate(peerCall: PeerCall, params: any); - sendSignallingMessage(type: EventType, content: Record); -} - -// when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already -// do for sharing keys will be best as that already deals with room tracking. -/** - * Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform - * */ -/** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/ -class PeerCall implements PeerConnectionHandler { - private readonly peerConnection: PeerConnection; - - - public state = CallState.Fledgling; - public hangupParty: CallParty; - public hangupReason: string; - public direction: CallDirection; - public peerConn?: RTCPeerConnection; - - // A queue for candidates waiting to go out. - // We try to amalgamate candidates into a single candidate message where - // possible - private candidateSendQueue: Array = []; - private candidateSendTries = 0; - private sentEndOfCandidates = false; - - private inviteOrAnswerSent = false; - private waitForLocalAVStream: boolean; - private opponentVersion: number | string; - // The party ID of the other side: undefined if we haven't chosen a partner - // yet, null if we have but they didn't send a party ID. - private opponentPartyId: PartyId; - private opponentCaps: CallCapabilities; - private inviteTimeout: number; - private iceDisconnectedTimeout: number; - - // The logic of when & if a call is on hold is nontrivial and explained in is*OnHold - // This flag represents whether we want the other party to be on hold - private remoteOnHold = false; - - // the stats for the call at the point it ended. We can't get these after we - // tear the call down, so we just grab a snapshot before we stop the call. - // The typescript definitions have this type as 'any' :( - private callStatsAtEnd: any[]; - - // Perfect negotiation state: https://www.w3.org/TR/webrtc/#perfect-negotiation-example - private makingOffer = false; - private ignoreOffer: boolean; - - // If candidates arrive before we've picked an opponent (which, in particular, - // will happen if the opponent sends candidates eagerly before the user answers - // the call) we buffer them up here so we can then add the ones from the party we pick - private remoteCandidateBuffer: Map; - - private remoteAssertedIdentity: AssertedIdentity; - - private remoteSDPStreamMetadata?: SDPStreamMetadata; - - private negotiationQueue: AsyncQueue; - - constructor( - private readonly handler: PeerCallHandler, - private localMedia: LocalMedia, - webRTC: WebRTC - ) { - this.peerConnection = webRTC.createPeerConnection(this); - // TODO: should we use this to serialize all state changes? - this.negotiationQueue = new AsyncQueue(this.handleNegotiation, void); - } - - // PeerConnectionHandler method - onIceConnectionStateChange(state: RTCIceConnectionState) {} - // PeerConnectionHandler method - onLocalIceCandidate(candidate: RTCIceCandidate) {} - // PeerConnectionHandler method - onIceGatheringStateChange(state: RTCIceGatheringState) {} - // PeerConnectionHandler method - onRemoteTracksChanged(tracks: Track[]) {} - // PeerConnectionHandler method - onDataChannelChanged(dataChannel: DataChannel | undefined) {} - // PeerConnectionHandler method - onNegotiationNeeded() { - // trigger handleNegotiation - this.negotiationQueue.push(void); - } - - // calls are serialized and deduplicated by negotiationQueue - private handleNegotiation = async (): Promise => { - const offer = await this.peerConnection.createOffer(); - this.peerConnection.setLocalDescription(offer); - // need to queue this - const message = { - offer, - sdp_stream_metadata: this.localMedia.getSDPMetadata(), - version: 1 - } - if (this.state === CallState.Fledgling) { - const sendPromise = this.handler.sendSignallingMessage(EventType.Invite, message); - this.setState(CallState.InviteSent); - } else { - await this.handler.sendSignallingMessage(EventType.Negotiate, message); - } - }; - - async sendInvite(localMediaPromise: Promise): Promise { - if (this.state !== CallState.Fledgling) { - return; - } - this.setState(CallState.WaitLocalMedia); - this.localMedia = await localMediaPromise; - // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called - for (const t of this.localMedia.tracks) { - this.peerConnection.addTrack(t); - } - await this.waitForState(CallState.Ended, CallState.InviteSent); - } - - async sendAnswer(localMediaPromise: Promise): Promise { - if (this.callHasEnded()) return; - - if (this.state !== CallState.Ringing) { - return; - } - - this.setState(CallState.WaitLocalMedia); - this.waitForLocalAVStream = true; - this.localMedia = await localMediaPromise; - this.waitForLocalAVStream = false; - - // enqueue the following - - // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called - for (const t of this.localMedia.tracks) { - this.peerConnection.addTrack(t); - } - - this.setState(CallState.CreateAnswer); - - let myAnswer; - try { - myAnswer = await this.peerConn.createAnswer(); - } catch (err) { - logger.debug("Failed to create answer: ", err); - this.terminate(CallParty.Local, CallErrorCode.CreateAnswer, true); - return; - } - - try { - await this.peerConn.setLocalDescription(myAnswer); - this.setState(CallState.Connecting); - - // Allow a short time for initial candidates to be gathered - await new Promise(resolve => { - setTimeout(resolve, 200); - }); - // inlined sendAnswer - const answerContent = { - answer: { - sdp: this.peerConn.localDescription.sdp, - // type is now deprecated as of Matrix VoIP v1, but - // required to still be sent for backwards compat - type: this.peerConn.localDescription.type, - }, - [SDPStreamMetadataKey]: this.getLocalSDPStreamMetadata(true), - } as MCallAnswer; - - answerContent.capabilities = { - 'm.call.transferee': this.client.supportsCallTransfer, - 'm.call.dtmf': false, - }; - - // We have just taken the local description from the peerConn which will - // contain all the local candidates added so far, so we can discard any candidates - // we had queued up because they'll be in the answer. - logger.info(`Discarding ${this.candidateSendQueue.length} candidates that will be sent in answer`); - this.candidateSendQueue = []; - - try { - await this.sendVoipEvent(EventType.CallAnswer, answerContent); - // If this isn't the first time we've tried to send the answer, - // we may have candidates queued up, so send them now. - this.inviteOrAnswerSent = true; - } catch (error) { - // We've failed to answer: back to the ringing state - this.setState(CallState.Ringing); - this.client.cancelPendingEvent(error.event); - - let code = CallErrorCode.SendAnswer; - let message = "Failed to send answer"; - if (error.name == 'UnknownDeviceError') { - code = CallErrorCode.UnknownDevices; - message = "Unknown devices present in the room"; - } - this.emit(CallEvent.Error, new CallError(code, message, error)); - throw error; - } - - // error handler re-throws so this won't happen on error, but - // we don't want the same error handling on the candidate queue - this.sendCandidateQueue(); - } catch (err) { - logger.debug("Error setting local description!", err); - this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true); - return; - } - } - - async updateLocalMedia(localMediaPromise: Promise) { - const oldMedia = this.localMedia; - this.localMedia = await localMediaPromise; - - const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => { - const oldTrack = selectTrack(oldMedia); - const newTrack = selectTrack(this.localMedia); - if (oldTrack && newTrack) { - this.peerConnection.replaceTrack(oldTrack, newTrack); - } else if (oldTrack) { - this.peerConnection.removeTrack(oldTrack); - } else if (newTrack) { - this.peerConnection.addTrack(newTrack); - } - }; - - // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called - applyTrack(m => m.microphoneTrack); - applyTrack(m => m.cameraTrack); - applyTrack(m => m.screenShareTrack); - } - - - /** - * Replace this call with a new call, e.g. for glare resolution. Used by - * MatrixClient. - * @param {MatrixCall} newCall The new call. - */ - public replacedBy(newCall: MatrixCall): void { - if (this.state === CallState.WaitLocalMedia) { - logger.debug("Telling new call to wait for local media"); - newCall.waitForLocalAVStream = true; - } else if ([CallState.CreateOffer, CallState.InviteSent].includes(this.state)) { - if (newCall.direction === CallDirection.Outbound) { - newCall.queueGotCallFeedsForAnswer([]); - } else { - logger.debug("Handing local stream to new call"); - newCall.queueGotCallFeedsForAnswer(this.getLocalFeeds().map(feed => feed.clone())); - } - } - this.successor = newCall; - this.emit(CallEvent.Replaced, newCall); - this.hangup(CallErrorCode.Replaced, true); - } - - /** - * Hangup a call. - * @param {string} reason The reason why the call is being hung up. - * @param {boolean} suppressEvent True to suppress emitting an event. - */ - public hangup(reason: CallErrorCode, suppressEvent: boolean): void { - if (this.callHasEnded()) return; - - logger.debug("Ending call " + this.callId); - this.terminate(CallParty.Local, reason, !suppressEvent); - // We don't want to send hangup here if we didn't even get to sending an invite - if (this.state === CallState.WaitLocalMedia) return; - const content = {}; - // Don't send UserHangup reason to older clients - if ((this.opponentVersion && this.opponentVersion >= 1) || reason !== CallErrorCode.UserHangup) { - content["reason"] = reason; - } - this.sendVoipEvent(EventType.CallHangup, content); - } - - /** - * Reject a call - * This used to be done by calling hangup, but is a separate method and protocol - * event as of MSC2746. - */ - public reject(): void { - if (this.state !== CallState.Ringing) { - throw Error("Call must be in 'ringing' state to reject!"); - } - - if (this.opponentVersion < 1) { - logger.info( - `Opponent version is less than 1 (${this.opponentVersion}): sending hangup instead of reject`, - ); - this.hangup(CallErrorCode.UserHangup, true); - return; - } - - logger.debug("Rejecting call: " + this.callId); - this.terminate(CallParty.Local, CallErrorCode.UserHangup, true); - this.sendVoipEvent(EventType.CallReject, {}); - } - - // request the type of incoming track - getPurposeForStreamId(streamId: string): StreamPurpose { - // TODO: should we return a promise here for the case where the metadata hasn't arrived yet? - const metaData = this.remoteSDPStreamMetadata[streamId]; - return metadata?.purpose as StreamPurpose ?? StreamPurpose.UserMedia; - } - - private setState(state: CallState): void { - const oldState = this.state; - this.state = state; - this.handler.emitUpdate(); - if (this.inviteDeferred) { - if (this.state === CallState.InviteSent) { - this.inviteDeferred.resolve(); - } - } - } - - handleIncomingSignallingMessage(type: CallSetupMessageType, content: Record, partyId: PartyId) { - switch (type) { - case CallSetupMessageType.Invite: - case CallSetupMessageType.Answer: - this.handleAnswer(content); - break; - case CallSetupMessageType.Candidates: - this.handleRemoteIceCandidates(content); - break; - case CallSetupMessageType.Hangup: - } - } - - private async handleAnswer(content: MCallAnswer, partyId: PartyId) { - // add buffered ice candidates to peerConnection - if (this.opponentPartyId !== undefined) { - return; - } - this.opponentPartyId = partyId; - const bufferedCandidates = this.remoteCandidateBuffer?.get(partyId); - if (bufferedCandidates) { - this.addIceCandidates(bufferedCandidates); - } - this.remoteCandidateBuffer = undefined; - - this.setState(CallState.Connecting); - - const sdpStreamMetadata = content[SDPStreamMetadataKey]; - if (sdpStreamMetadata) { - this.updateRemoteSDPStreamMetadata(sdpStreamMetadata); - } else { - logger.warn("Did not get any SDPStreamMetadata! Can not send/receive multiple streams"); - } - - try { - await this.peerConnection.setRemoteDescription(content.answer); - } catch (e) { - logger.debug("Failed to set remote description", e); - this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false); - return; - } - - // If the answer we selected has a party_id, send a select_answer event - // We do this after setting the remote description since otherwise we'd block - // call setup on it - if (this.opponentPartyId !== null) { - try { - await this.sendVoipEvent(EventType.CallSelectAnswer, { - selected_party_id: this.opponentPartyId, - }); - } catch (err) { - // This isn't fatal, and will just mean that if another party has raced to answer - // the call, they won't know they got rejected, so we carry on & don't retry. - logger.warn("Failed to send select_answer event", err); - } - } - } - - private handleRemoteIceCandidates(content: Record) { - if (this.state === CallState.Ended) { - return; - } - const candidates = content.candidates; - if (!candidates) { - return; - } - if (this.opponentPartyId === undefined) { - if (!this.remoteCandidateBuffer) { - this.remoteCandidateBuffer = new Map(); - } - const bufferedCandidates = this.remoteCandidateBuffer.get(fromPartyId) || []; - bufferedCandidates.push(...candidates); - this.remoteCandidateBuffer.set(fromPartyId, bufferedCandidates); - } else { - this.addIceCandidates(candidates); - } - } - - private async addIceCandidates(candidates: RTCIceCandidate[]): Promise { - for (const candidate of candidates) { - if ( - (candidate.sdpMid === null || candidate.sdpMid === undefined) && - (candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined) - ) { - logger.debug("Ignoring remote ICE candidate with no sdpMid or sdpMLineIndex"); - continue; - } - logger.debug( - "Call " + this.callId + " got remote ICE " + candidate.sdpMid + " candidate: " + candidate.candidate, - ); - try { - await this.peerConnection.addIceCandidate(candidate); - } catch (err) { - if (!this.ignoreOffer) { - logger.info("Failed to add remote ICE candidate", err); - } - } - } - } + sendSignallingMessage(message: InviteMessage); } diff --git a/src/matrix/calls/TODO.md b/src/matrix/calls/TODO.md index a220984e..397c9d38 100644 --- a/src/matrix/calls/TODO.md +++ b/src/matrix/calls/TODO.md @@ -1,8 +1,19 @@ + - relevant MSCs next to spec: + - https://github.com/matrix-org/matrix-doc/pull/2746 Improved Signalling for 1:1 VoIP + - https://github.com/matrix-org/matrix-doc/pull/2747 Transferring VoIP Calls + - https://github.com/matrix-org/matrix-doc/pull/3077 Support for multi-stream VoIP + - https://github.com/matrix-org/matrix-doc/pull/3086 Asserted identity on VoIP calls + - https://github.com/matrix-org/matrix-doc/pull/3291 Muting in VoIP calls + - https://github.com/matrix-org/matrix-doc/pull/3401 Native Group VoIP Signalling + ## TODO - PeerCall - send invite - - find out if we need to do something different when renegotation is triggered (a subsequent onnegotiationneeded event) whether - we sent the invite/offer or answer. e.g. do we always do createOffer/setLocalDescription and then send it over a matrix negotiation event? even if we before called createAnswer. + - implement terminate + - implement waitForState + + - find out if we need to do something different when renegotation is triggered (a subsequent onnegotiationneeded event) whether + we sent the invite/offer or answer. e.g. do we always do createOffer/setLocalDescription and then send it over a matrix negotiation event? even if we before called createAnswer. - handle receiving offer and send anwser - handle sending ice candidates - handle ice candidates finished (iceGatheringState === 'complete') @@ -36,12 +47,29 @@ we wait for other participants to add their user and device (in the sources) for each (userid, deviceid) - if userId < ourUserId - - we setup a peer connection + - get local media + - we setup a peer connection + - add local tracks - we wait for negotation event to get sdp + - peerConn.createOffer + - peerConn.setLocalDescription - we send an m.call.invite - else - wait for invite from other side +on local ice candidate: + - if we haven't ... sent invite yet? or received answer? buffer candidate + - otherwise send candidate (without buffering?) + +on incoming call: + - ring, offer to answer + +answering incoming call + - get local media + - peerConn.setRemoteDescription + - add local tracks to peerConn + - peerConn.createAnswer() + - peerConn.setLocalDescription in some cases, we will actually send the invite to all devices (e.g. SFU), so we probably still need to handle multiple anwsers? @@ -50,9 +78,6 @@ so we would send an invite to multiple devices and pick the one for which we received the anwser first. between invite and anwser, we could already receive ice candidates that we need to buffer. -should a PeerCall only exist after we've received an answer? -Before that, we could have a PeerCallInvite - updating the metadata: @@ -64,3 +89,38 @@ if just muting: use m.call.sdp_stream_metadata_changed party identification - for 1:1 calls, we identify with a party_id - for group calls, we identify with a device_id + + + + +## TODO + +Build basic version of PeerCall +Build basic version of GroupCall +Make it possible to olm encrypt the messages +Do work needed for state events + - receiving (almost done?) + - sending +Expose call objects +Write view model +write view + +## Calls questions\ + - how do we handle glare between group calls (e.g. different state events with different call ids?) + - Split up DOM part into platform code? What abstractions to choose? + Does it make sense to come up with our own API very similar to DOM api? + - what code do we copy over vs what do we implement ourselves? + - MatrixCall: perhaps we can copy it over and modify it to our needs? Seems to have a lot of edge cases implemented. + - what is partyId about? + - CallFeed: I need better understand where it is used. It's basically a wrapper around a MediaStream with volume detection. Could it make sense to put this in platform for example? + + - which parts of MSC2746 are still relevant for group calls? + - which parts of MSC2747 are still relevant for group calls? it seems mostly orthogonal? + - SOLVED: how does switching channels work? This was only enabled by MSC 2746 + - you do getUserMedia()/getDisplayMedia() to get the stream(s) + - you call removeTrack/addTrack on the peerConnection + - you receive a negotiationneeded event + - you call createOffer + - you send m.call.negotiate + - SOLVED: wrt to MSC2746, is the screen share track and the audio track (and video track) part of the same stream? or do screen share tracks need to go in a different stream? it sounds incompatible with the MSC2746 requirement. + - SOLVED: how does muting work? MediaStreamTrack.enabled diff --git a/src/platform/types/WebRTC.ts b/src/platform/types/WebRTC.ts index aff74f62..8b224dfe 100644 --- a/src/platform/types/WebRTC.ts +++ b/src/platform/types/WebRTC.ts @@ -15,11 +15,7 @@ limitations under the License. */ import {Track, TrackType} from "./MediaDevices"; - -export enum StreamPurpose { - UserMedia = "m.usermedia", - ScreenShare = "m.screenshare" -} +import {SDPStreamMetadataPurpose} from "../../matrix/calls/callEventTypes"; export interface WebRTC { createPeerConnection(handler: PeerConnectionHandler): PeerConnection; @@ -33,7 +29,7 @@ export interface PeerConnectionHandler { onDataChannelChanged(dataChannel: DataChannel | undefined); onNegotiationNeeded(); // request the type of incoming stream - getPurposeForStreamId(streamId: string): StreamPurpose; + getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose; } // does it make sense to wrap this? export interface DataChannel { @@ -42,8 +38,11 @@ export interface DataChannel { } export interface PeerConnection { - get remoteTracks(): Track[] | undefined; + notifyStreamPurposeChanged(): void; + get remoteTracks(): Track[]; get dataChannel(): DataChannel | undefined; + get iceGatheringState(): RTCIceGatheringState; + get localDescription(): RTCSessionDescription | undefined; createOffer(): Promise; createAnswer(): Promise; setLocalDescription(description?: RTCSessionDescriptionInit): Promise; diff --git a/src/platform/web/dom/MediaDevices.ts b/src/platform/web/dom/MediaDevices.ts index 7d5c8f4f..7af15168 100644 --- a/src/platform/web/dom/MediaDevices.ts +++ b/src/platform/web/dom/MediaDevices.ts @@ -90,9 +90,10 @@ export class TrackWrapper implements Track { constructor( public readonly track: MediaStreamTrack, public readonly stream: MediaStream, - public readonly type: TrackType + private _type: TrackType, ) {} + get type(): TrackType { return this._type; } get label(): string { return this.track.label; } get id(): string { return this.track.id; } get streamId(): string { return this.stream.id; } @@ -102,6 +103,10 @@ export class TrackWrapper implements Track { setMuted(muted: boolean): void { this.track.enabled = !muted; } + + setType(type: TrackType): void { + this._type = type; + } } export class AudioTrackWrapper extends TrackWrapper { diff --git a/src/platform/web/dom/WebRTC.ts b/src/platform/web/dom/WebRTC.ts index 917f42af..98c77ff9 100644 --- a/src/platform/web/dom/WebRTC.ts +++ b/src/platform/web/dom/WebRTC.ts @@ -16,7 +16,8 @@ limitations under the License. import {TrackWrapper, wrapTrack} from "./MediaDevices"; import {Track, TrackType} from "../../types/MediaDevices"; -import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection, StreamPurpose} from "../../types/WebRTC"; +import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection} from "../../types/WebRTC"; +import {SDPStreamMetadataPurpose} from "../../../matrix/calls/callEventTypes"; const POLLING_INTERVAL = 200; // ms export const SPEAKING_THRESHOLD = -60; // dB @@ -25,8 +26,8 @@ const SPEAKING_SAMPLE_COUNT = 8; // samples class DOMPeerConnection implements PeerConnection { private readonly peerConnection: RTCPeerConnection; private readonly handler: PeerConnectionHandler; - private dataChannelWrapper?: DOMDataChannel; - private _remoteTracks: TrackWrapper[]; + //private dataChannelWrapper?: DOMDataChannel; + private _remoteTracks: TrackWrapper[] = []; constructor(handler: PeerConnectionHandler, forceTURN: boolean, turnServers: RTCIceServer[], iceCandidatePoolSize = 0) { this.handler = handler; @@ -39,7 +40,9 @@ class DOMPeerConnection implements PeerConnection { } get remoteTracks(): Track[] { return this._remoteTracks; } - get dataChannel(): DataChannel | undefined { return this.dataChannelWrapper; } + get dataChannel(): DataChannel | undefined { return undefined; } + get iceGatheringState(): RTCIceGatheringState { return this.peerConnection.iceGatheringState; } + get localDescription(): RTCSessionDescription | undefined { return this.peerConnection.localDescription ?? undefined; } createOffer(): Promise { return this.peerConnection.createOffer(); @@ -97,6 +100,14 @@ class DOMPeerConnection implements PeerConnection { } return false; } + + notifyStreamPurposeChanged(): void { + for (const track of this.remoteTracks) { + const wrapper = track as TrackWrapper; + wrapper.setType(this.getRemoteTrackType(wrapper.track, wrapper.streamId)); + } + } + createDataChannel(): DataChannel { return new DataChannel(this.peerConnection.createDataChannel()); } @@ -173,20 +184,19 @@ class DOMPeerConnection implements PeerConnection { // of the new tracks, filter the ones that we didn't already knew about const addedTracks = updatedTracks.filter(ut => !this._remoteTracks.some(t => t.track.id === ut.track.id)); // wrap them - const wrappedAddedTracks = addedTracks.map(t => this.wrapRemoteTrack(t.track, t.stream)); + const wrappedAddedTracks = addedTracks.map(t => wrapTrack(t.track, t.stream, this.getRemoteTrackType(t.track, t.stream.id))); // and concat the tracks for other streams with the added tracks this._remoteTracks = withoutRemovedTracks.concat(...wrappedAddedTracks); this.handler.onRemoteTracksChanged(this.remoteTracks); } - private wrapRemoteTrack(track: MediaStreamTrack, stream: MediaStream): TrackWrapper { - let type: TrackType; + + private getRemoteTrackType(track: MediaStreamTrack, streamId: string): TrackType { if (track.kind === "video") { - const purpose = this.handler.getPurposeForStreamId(stream.id); - type = purpose === StreamPurpose.UserMedia ? TrackType.Camera : TrackType.ScreenShare; + const purpose = this.handler.getPurposeForStreamId(streamId); + return purpose === SDPStreamMetadataPurpose.Usermedia ? TrackType.Camera : TrackType.ScreenShare; } else { - type = TrackType.Microphone; + return TrackType.Microphone; } - return wrapTrack(track, stream, type); } /** diff --git a/src/utils/recursivelyAssign.ts b/src/utils/recursivelyAssign.ts new file mode 100644 index 00000000..adf5f2ef --- /dev/null +++ b/src/utils/recursivelyAssign.ts @@ -0,0 +1,39 @@ +/* +Copyright 2015, 2016 OpenMarket Ltd +Copyright 2019 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + + +/** + * This function is similar to Object.assign() but it assigns recursively and + * allows you to ignore nullish values from the source + * + * @param {Object} target + * @param {Object} source + * @returns the target object + */ +export function recursivelyAssign(target: Object, source: Object, ignoreNullish = false): any { + for (const [sourceKey, sourceValue] of Object.entries(source)) { + if (target[sourceKey] instanceof Object && sourceValue) { + recursivelyAssign(target[sourceKey], sourceValue); + continue; + } + if ((sourceValue !== null && sourceValue !== undefined) || !ignoreNullish) { + target[sourceKey] = sourceValue; + continue; + } + } + return target; +} \ No newline at end of file