From 2cbda923c862077016dae7986a266189d9568988 Mon Sep 17 00:00:00 2001 From: Johannes Marbach Date: Wed, 8 Oct 2025 16:05:55 +0200 Subject: [PATCH] Inline Olm & Megolm specifications Fixes: #1543 Signed-off-by: Johannes Marbach --- config/_default/hugo.toml | 9 +- content/olm-megolm/_index.md | 712 ++++++++++++++++++++++++ content/proposals.md | 2 +- layouts/_markup/render-passthrough.html | 9 + layouts/docs/baseof.html | 3 + 5 files changed, 733 insertions(+), 2 deletions(-) create mode 100644 content/olm-megolm/_index.md create mode 100644 layouts/_markup/render-passthrough.html diff --git a/config/_default/hugo.toml b/config/_default/hugo.toml index 400643b2..0978421d 100644 --- a/config/_default/hugo.toml +++ b/config/_default/hugo.toml @@ -43,6 +43,12 @@ description = "Home of the Matrix specification for decentralised communication" [markup.goldmark.renderer] # Enables us to render raw HTML unsafe = true + [markup.goldmark.extensions] + [markup.goldmark.extensions.passthrough] + enable = true + [markup.goldmark.extensions.passthrough.delimiters] + block = [['\[', '\]'], ['$$', '$$']] + inline = [['\(', '\)']] [markup.highlight] # See a complete list of available styles at https://xyproto.github.io/splash/docs/all.html # If the style is changed, remember to regenerate the CSS with: @@ -121,7 +127,8 @@ sidebar_menu_compact = true [[server.headers]] for = '/**' [server.headers.values] - Content-Security-Policy = "default-src 'self'; style-src 'self'; script-src 'self'; img-src 'self' data:; connect-src 'self'; font-src 'self' data:; media-src 'self'; child-src 'self'; form-action 'self'; object-src 'self'" + # TODO: figure out CSP + # Content-Security-Policy = "default-src 'self'; style-src 'self'; script-src 'self'; img-src 'self' data:; connect-src 'self'; font-src 'self' data:; media-src 'self'; child-src 'self'; form-action 'self'; object-src 'self'" X-XSS-Protection = "1; mode=block" X-Content-Type-Options = "nosniff" # Strict-Transport-Security = "max-age=31536000; includeSubDomains; preload" diff --git a/content/olm-megolm/_index.md b/content/olm-megolm/_index.md new file mode 100644 index 00000000..8369a41f --- /dev/null +++ b/content/olm-megolm/_index.md @@ -0,0 +1,712 @@ +--- +title: "Olm & Megolm" +weight: 61 +type: docs +--- + +## Olm: A Cryptographic Ratchet + +An implementation of the double cryptographic ratchet described by +https://whispersystems.org/docs/specifications/doubleratchet/. + +### Notation + +This document uses \(\parallel\) to represent string concatenation. When +\(\parallel\) appears on the right hand side of an \(=\) it means that +the inputs are concatenated. When \(\parallel\) appears on the left hand +side of an \(=\) it means that the output is split. + +When this document uses \(\operatorname{ECDH}\left(K_A,K_B\right)\) it means +that each party computes a Diffie-Hellman agreement using their private key +and the remote party's public key. +So party \(A\) computes \(\operatorname{ECDH}\left(K_B^{public},K_A^{private}\right)\) +and party \(B\) computes \(\operatorname{ECDH}\left(K_A^{public},K_B^{private}\right)\). + +Where this document uses \(\operatorname{HKDF}\left(salt,IKM,info,L\right)\) it +refers to the [HMAC-based key derivation function][] with a salt value of +\(salt\), input key material of \(IKM\), context string \(info\), +and output keying material length of \(L\) bytes. + +### The Olm Algorithm + +#### Initial setup + +The setup takes four [Curve25519][] inputs: Identity keys for Alice and Bob, +\(I_A\) and \(I_B\), and one-time keys for Alice and Bob, +\(E_A\) and \(E_B\). A shared secret, \(S\), is generated using +[Triple Diffie-Hellman][]. The initial 256 bit root key, \(R_0\), and 256 +bit chain key, \(C_{0,0}\), are derived from the shared secret using an +HMAC-based Key Derivation Function using [SHA-256][] as the hash function +([HKDF-SHA-256][]) with default salt and ``"OLM_ROOT"`` as the info. + +```math +\begin{aligned} + S&=\operatorname{ECDH}\left(I_A,E_B\right)\;\parallel\; + \operatorname{ECDH}\left(E_A,I_B\right)\;\parallel\; + \operatorname{ECDH}\left(E_A,E_B\right)\\ + + R_0\;\parallel\;C_{0,0}&= + \operatorname{HKDF}\left(0,S,\text{``OLM\_ROOT"},64\right) +\end{aligned} +``` + +#### Advancing the root key + +Advancing a root key takes the previous root key, \(R_{i-1}\), and two +Curve25519 inputs: the previous ratchet key, \(T_{i-1}\), and the current +ratchet key \(T_i\). The even ratchet keys are generated by Alice. +The odd ratchet keys are generated by Bob. A shared secret is generated +using Diffie-Hellman on the ratchet keys. The next root key, \(R_i\), and +chain key, \(C_{i,0}\), are derived from the shared secret using +[HKDF-SHA-256][] using \(R_{i-1}\) as the salt and ``"OLM_RATCHET"`` as the +info. + +```math +\begin{aligned} + R_i\;\parallel\;C_{i,0}&= + \operatorname{HKDF}\left( + R_{i-1}, + \operatorname{ECDH}\left(T_{i-1},T_i\right), + \text{``OLM\_RATCHET"}, + 64 + \right) +\end{aligned} +``` + +#### Advancing the chain key + +Advancing a chain key takes the previous chain key, \(C_{i,j-1}\). The next +chain key, \(C_{i,j}\), is the [HMAC-SHA-256][] of ``"\x02"`` using the +previous chain key as the key. + +```math +\begin{aligned} + C_{i,j}&=\operatorname{HMAC}\left(C_{i,j-1},\text{``\char`\\x02"}\right) +\end{aligned} +``` + +#### Creating a message key + +Creating a message key takes the current chain key, \(C_{i,j}\). The +message key, \(M_{i,j}\), is the [HMAC-SHA-256][] of ``"\x01"`` using the +current chain key as the key. The message keys where \(i\) is even are used +by Alice to encrypt messages. The message keys where \(i\) is odd are used +by Bob to encrypt messages. + +```math +\begin{aligned} + M_{i,j}&=\operatorname{HMAC}\left(C_{i,j},\text{``\char`\\x01"}\right) +\end{aligned} +``` + +### The Olm Protocol + +#### Creating an outbound session + +Bob publishes the public parts of his identity key, \(I_B\), and some +single-use one-time keys \(E_B\). + +Alice downloads Bob's identity key, \(I_B\), and a one-time key, +\(E_B\). She generates a new single-use key, \(E_A\), and computes a +root key, \(R_0\), and a chain key \(C_{0,0}\). She also generates a +new ratchet key \(T_0\). + +#### Sending the first pre-key messages + +Alice computes a message key, \(M_{0,j}\), and a new chain key, +\(C_{0,j+1}\), using the current chain key. She replaces the current chain +key with the new one. + +Alice encrypts her plain-text with the message key, \(M_{0,j}\), using an +authenticated encryption scheme (see below) to get a cipher-text, +\(X_{0,j}\). + +She then sends the following to Bob: + * The public part of her identity key, \(I_A\) + * The public part of her single-use key, \(E_A\) + * The public part of Bob's single-use key, \(E_B\) + * The current chain index, \(j\) + * The public part of her ratchet key, \(T_0\) + * The cipher-text, \(X_{0,j}\) + +Alice will continue to send pre-key messages until she receives a message from +Bob. + +#### Creating an inbound session from a pre-key message + +Bob receives a pre-key message as above. + +Bob looks up the private part of his single-use key, \(E_B\). He can now +compute the root key, \(R_0\), and the chain key, \(C_{0,0}\), from +\(I_A\), \(E_A\), \(I_B\), and \(E_B\). + +Bob then advances the chain key \(j\) times, to compute the chain key used +by the message, \(C_{0,j}\). He now creates the +message key, \(M_{0,j}\), and attempts to decrypt the cipher-text, +\(X_{0,j}\). If the cipher-text's authentication is correct then Bob can +discard the private part of his single-use one-time key, \(E_B\). + +Bob stores Alice's initial ratchet key, \(T_0\), until he wants to +send a message. + +#### Sending normal messages + +Once a message has been received from the other side, a session is considered +established, and a more compact form is used. + +To send a message, the user checks if they have a sender chain key, +\(C_{i,j}\). Alice uses chain keys where \(i\) is even. Bob uses chain +keys where \(i\) is odd. If the chain key doesn't exist then a new ratchet +key \(T_i\) is generated and a new root key \(R_i\) and chain key +\(C_{i,0}\) are computed using \(R_{i-1}\), \(T_{i-1}\) and +\(T_i\). + +A message key, +\(M_{i,j}\) is computed from the current chain key, \(C_{i,j}\), and +the chain key is replaced with the next chain key, \(C_{i,j+1}\). The +plain-text is encrypted with \(M_{i,j}\), using an authenticated encryption +scheme (see below) to get a cipher-text, \(X_{i,j}\). + +The user then sends the following to the recipient: + * The current chain index, \(j\) + * The public part of the current ratchet key, \(T_i\) + * The cipher-text, \(X_{i,j}\) + +#### Receiving messages + +The user receives a message as above with the sender's current chain index, \(j\), +the sender's ratchet key, \(T_i\), and the cipher-text, \(X_{i,j}\). + +The user checks if they have a receiver chain with the correct +\(i\) by comparing the ratchet key, \(T_i\). If the chain doesn't exist +then they compute a new root key, \(R_i\), and a new receiver chain, with +chain key \(C_{i,0}\), using \(R_{i-1}\), \(T_{i-1}\) and +\(T_i\). + +If the \(j\) of the message is less than +the current chain index on the receiver then the message may only be decrypted +if the receiver has stored a copy of the message key \(M_{i,j}\). Otherwise +the receiver computes the chain key, \(C_{i,j}\). The receiver computes the +message key, \(M_{i,j}\), from the chain key and attempts to decrypt the +cipher-text, \(X_{i,j}\). + +If the decryption succeeds the receiver updates the chain key for \(T_i\) +with \(C_{i,j+1}\) and stores the message keys that were skipped in the +process so that they can decode out of order messages. If the receiver created +a new receiver chain then they discard their current sender chain so that +they will create a new chain when they next send a message. + +### The Olm Message Format + +Olm uses two types of messages. The underlying transport protocol must provide +a means for recipients to distinguish between them. + +#### Normal Messages + +Olm messages start with a one byte version followed by a variable length +payload followed by a fixed length message authentication code. + +```text + +--------------+------------------------------------+-----------+ + | Version Byte | Payload Bytes | MAC Bytes | + +--------------+------------------------------------+-----------+ +``` + +The version byte is ``"\x03"``. + +The payload consists of key-value pairs where the keys are integers and the +values are integers and strings. The keys are encoded as a variable length +integer tag where the 3 lowest bits indicates the type of the value: +0 for integers, 2 for strings. If the value is an integer then the tag is +followed by the value encoded as a variable length integer. If the value is +a string then the tag is followed by the length of the string encoded as +a variable length integer followed by the string itself. + +Olm uses a variable length encoding for integers. Each integer is encoded as a +sequence of bytes with the high bit set followed by a byte with the high bit +clear. The seven low bits of each byte store the bits of the integer. The least +significant bits are stored in the first byte. + +**Name**|**Tag**|**Type**|**Meaning** +:-----:|:-----:|:-----:|:-----: +Ratchet-Key|0x0A|String|The public part of the ratchet key, Ti, of the message +Chain-Index|0x10|Integer|The chain index, j, of the message +Cipher-Text|0x22|String|The cipher-text, Xi, j, of the message + +The length of the MAC is determined by the authenticated encryption algorithm +being used. (Olm version 1 uses [HMAC-SHA-256][], truncated to 8 bytes). The +MAC protects all of the bytes preceding the MAC. + +#### Pre-Key Messages + +Olm pre-key messages start with a one byte version followed by a variable +length payload. + +```text + +--------------+------------------------------------+ + | Version Byte | Payload Bytes | + +--------------+------------------------------------+ +``` + +The version byte is ``"\x03"``. + +The payload uses the same key-value format as for normal messages. + +**Name**|**Tag**|**Type**|**Meaning** +:-----:|:-----:|:-----:|:-----: +One-Time-Key|0x0A|String|The public part of Bob's single-use key, Eb. +Base-Key|0x12|String|The public part of Alice's single-use key, Ea. +Identity-Key|0x1A|String|The public part of Alice's identity key, Ia. +Message|0x22|String|An embedded Olm message with its own version and MAC. + +### Olm Authenticated Encryption + +#### Version 1 + +Version 1 of Olm uses [AES-256][] in [CBC][] mode with [PKCS#7][] padding for +encryption and [HMAC-SHA-256][] (truncated to 64 bits) for authentication. The +256 bit AES key, 256 bit HMAC key, and 128 bit AES IV are derived from the +message key using [HKDF-SHA-256][] using the default salt and an info of +``"OLM_KEYS"``. + +```math +\begin{aligned} + AES\_KEY_{i,j}\;\parallel\;HMAC\_KEY_{i,j}\;\parallel\;AES\_IV_{i,j} + &= \operatorname{HKDF}\left(0,M_{i,j},\text{``OLM\_KEYS"},80\right) +\end{aligned} +``` + +The plain-text is encrypted with AES-256, using the key \(AES\_KEY_{i,j}\) +and the IV \(AES\_IV_{i,j}\) to give the cipher-text, \(X_{i,j}\). + +Then the entire message (including the Version Byte and all Payload Bytes) are +passed through [HMAC-SHA-256][]. The first 8 bytes of the MAC are appended to the message. + +### Message authentication concerns + +To avoid unknown key-share attacks, the application must include identifying +data for the sending and receiving user in the plain-text of (at least) the +pre-key messages. Such data could be a user ID, a telephone number; +alternatively it could be the public part of a keypair which the relevant user +has proven ownership of. + +#### Example attacks + +1. Alice publishes her public [Curve25519][] identity key, \(I_A\). Eve + publishes the same identity key, claiming it as her own. Bob downloads + Eve's keys, and associates \(I_A\) with Eve. Alice sends a message to + Bob; Eve intercepts it before forwarding it to Bob. Bob believes the + message came from Eve rather than Alice. + + This is prevented if Alice includes her user ID in the plain-text of the + pre-key message, so that Bob can see that the message was sent by Alice + originally. + +2. Bob publishes his public [Curve25519][] identity key, \(I_B\). Eve + publishes the same identity key, claiming it as her own. Alice downloads + Eve's keys, and associates \(I_B\) with Eve. Alice sends a message to + Eve; Eve cannot decrypt it, but forwards it to Bob. Bob believes the + Alice sent the message to him, wheras Alice intended it to go to Eve. + + This is prevented by Alice including the user ID of the intended recpient + (Eve) in the plain-text of the pre-key message. Bob can now tell that the + message was meant for Eve rather than him. + +### IPR + +The Olm specification (this document) is hereby placed in the public domain. + +### Feedback + +Can be sent to olm at matrix.org. + +### Acknowledgements + +The ratchet that Olm implements was designed by Trevor Perrin and Moxie +Marlinspike - details at https://whispersystems.org/docs/specifications/doubleratchet/. Olm is +an entirely new implementation written by the Matrix.org team. + +[Curve25519]: http://cr.yp.to/ecdh.html +[Triple Diffie-Hellman]: https://whispersystems.org/blog/simplifying-otr-deniability/ +[HMAC-based key derivation function]: https://tools.ietf.org/html/rfc5869 +[HKDF-SHA-256]: https://tools.ietf.org/html/rfc5869 +[HMAC-SHA-256]: https://tools.ietf.org/html/rfc2104 +[SHA-256]: https://tools.ietf.org/html/rfc6234 +[AES-256]: http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf +[CBC]: http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf +[PKCS#7]: https://tools.ietf.org/html/rfc2315 + +## Megolm group ratchet + +An AES-based cryptographic ratchet intended for group communications. + +### Background + +The Megolm ratchet is intended for encrypted messaging applications where there +may be a large number of recipients of each message, thus precluding the use of +peer-to-peer encryption systems such as [Olm][]. + +It also allows a recipient to decrypt received messages multiple times. For +instance, in client/server applications, a copy of the ciphertext can be stored +on the (untrusted) server, while the client need only store the session keys. + +### Overview + +Each participant in a conversation uses their own outbound session for +encrypting messages. A session consists of a ratchet and an [Ed25519][] keypair. + +Secrecy is provided by the ratchet, which can be wound forwards but not +backwards, and is used to derive a distinct message key for each message. + +Authenticity is provided via Ed25519 signatures. + +The value of the ratchet, and the public part of the Ed25519 key, are shared +with other participants in the conversation via secure peer-to-peer +channels. Provided that peer-to-peer channel provides authenticity of the +messages to the participants and deniability of the messages to third parties, +the Megolm session will inherit those properties. + +### The Megolm ratchet algorithm + +The Megolm ratchet \(R_i\) consists of four parts, \(R_{i,j}\) for +\(j \in {0,1,2,3}\). The length of each part depends on the hash function +in use (256 bits for this version of Megolm). + +The ratchet is initialised with cryptographically-secure random data, and +advanced as follows: + +```math +\begin{aligned} +R_{i,0} &= + \begin{cases} + H_0\left(R_{2^{24}(n-1),0}\right) &\text{if }\exists n | i = 2^{24}n\\ + R_{i-1,0} &\text{otherwise} + \end{cases}\\ +R_{i,1} &= + \begin{cases} + H_1\left(R_{2^{24}(n-1),0}\right) &\text{if }\exists n | i = 2^{24}n\\ + H_1\left(R_{2^{16}(m-1),1}\right) &\text{if }\exists m | i = 2^{16}m\\ + R_{i-1,1} &\text{otherwise} + \end{cases}\\ +R_{i,2} &= + \begin{cases} + H_2\left(R_{2^{24}(n-1),0}\right) &\text{if }\exists n | i = 2^{24}n\\ + H_2\left(R_{2^{16}(m-1),1}\right) &\text{if }\exists m | i = 2^{16}m\\ + H_2\left(R_{2^8(p-1),2}\right) &\text{if }\exists p | i = 2^8p\\ + R_{i-1,2} &\text{otherwise} + \end{cases}\\ +R_{i,3} &= + \begin{cases} + H_3\left(R_{2^{24}(n-1),0}\right) &\text{if }\exists n | i = 2^{24}n\\ + H_3\left(R_{2^{16}(m-1),1}\right) &\text{if }\exists m | i = 2^{16}m\\ + H_3\left(R_{2^8(p-1),2}\right) &\text{if }\exists p | i = 2^8p\\ + H_3\left(R_{i-1,3}\right) &\text{otherwise} + \end{cases} +\end{aligned} +``` + +where \(H_0\), \(H_1\), \(H_2\), and \(H_3\) are different hash +functions. In summary: every \(2^8\) iterations, \(R_{i,3}\) is +reseeded from \(R_{i,2}\). Every \(2^{16}\) iterations, \(R_{i,2}\) +and \(R_{i,3}\) are reseeded from \(R_{i,1}\). Every \(2^{24}\) +iterations, \(R_{i,1}\), \(R_{i,2}\) and \(R_{i,3}\) are reseeded +from \(R_{i,0}\). + +The complete ratchet value, \(R_{i}\), is hashed to generate the keys used +to encrypt each message. This scheme allows the ratchet to be advanced an +arbitrary amount forwards while needing at most 1020 hash computations. A +client can decrypt chat history onwards from the earliest value of the ratchet +it is aware of, but cannot decrypt history from before that point without +reversing the hash function. + +This allows a participant to share its ability to decrypt chat history with +another from a point in the conversation onwards by giving a copy of the +ratchet at that point in the conversation. + + +### The Megolm protocol + +#### Session setup + +Each participant in a conversation generates their own Megolm session. A +session consists of three parts: + +* a 32 bit counter, \(i\). +* an [Ed25519][] keypair, \(K\). +* a ratchet, \(R_i\), which consists of four 256-bit values, + \(R_{i,j}\) for \(j \in {0,1,2,3}\). + +The counter \(i\) is initialised to \(0\). A new Ed25519 keypair is +generated for \(K\). The ratchet is simply initialised with 1024 bits of +cryptographically-secure random data. + +A single participant may use multiple sessions over the lifetime of a +conversation. The public part of \(K\) is used as an identifier to +discriminate between sessions. + +#### Sharing session data + +To allow other participants in the conversation to decrypt messages, the +session data is formatted as described in [Session-sharing format](#session-sharing-format). It is then +shared with other participants in the conversation via a secure peer-to-peer +channel (such as that provided by [Olm][]). + +When the session data is received from other participants, the recipient first +checks that the signature matches the public key. They then store their own +copy of the counter, ratchet, and public key. + +#### Message encryption + +This version of Megolm uses [AES-256][] in [CBC][] mode with [PKCS#7][] padding and +[HMAC-SHA-256][] (truncated to 64 bits). The 256 bit AES key, 256 bit HMAC key, +and 128 bit AES IV are derived from the megolm ratchet \(R_i\): + +```math +\begin{aligned} + \mathit{AES\_KEY}_{i}\;\parallel\;\mathit{HMAC\_KEY}_{i}\;\parallel\;\mathit{AES\_IV}_{i} + &= \operatorname{HKDF}\left(0,\,R_{i},\text{"MEGOLM\_KEYS"},\,80\right) \\ +\end{aligned} +``` + +where \(\parallel\) represents string splitting, and +\(\operatorname{HKDF}\left(\mathit{salt},\,\mathit{IKM},\,\mathit{info},\,L\right)\) +refers to the [HMAC-based key +derivation function][] using using [SHA-256][] as the hash function +([HKDF-SHA-256][]) with a salt value of \(\mathit{salt}\), input key material of +\(\mathit{IKM}\), context string \(\mathit{info}\), and output keying material length of +\(L\) bytes. + +The plain-text is encrypted with AES-256, using the key \(\mathit{AES\_KEY}_{i}\) +and the IV \(\mathit{AES\_IV}_{i}\) to give the cipher-text, \(X_{i}\). + +The ratchet index \(i\), and the cipher-text \(X_{i}\), are then packed +into a message as described in [Message format](#message-format). Then the entire message +(including the version bytes and all payload bytes) are passed through +HMAC-SHA-256. The first 8 bytes of the MAC are appended to the message. + +Finally, the authenticated message is signed using the Ed25519 keypair; the 64 +byte signature is appended to the message. + +The complete signed message, together with the public part of \(K\) (acting +as a session identifier), can then be sent over an insecure channel. The +message can then be authenticated and decrypted only by recipients who have +received the session data. + +#### Advancing the ratchet + +After each message is encrypted, the ratchet is advanced. This is done as +described in [The Megolm ratchet algorithm](#the-megolm-ratchet-algorithm), using the following definitions: + +```math +\begin{aligned} + H_0(A) &\equiv \operatorname{HMAC}(A,\text{``\char`\\x00"}) \\ + H_1(A) &\equiv \operatorname{HMAC}(A,\text{``\char`\\x01"}) \\ + H_2(A) &\equiv \operatorname{HMAC}(A,\text{``\char`\\x02"}) \\ + H_3(A) &\equiv \operatorname{HMAC}(A,\text{``\char`\\x03"}) \\ +\end{aligned} +``` + +where \(\operatorname{HMAC}(A, T)\) is the HMAC-SHA-256 of ``T``, using ``A`` as the +key. + +For outbound sessions, the updated ratchet and counter are stored in the +session. + +In order to maintain the ability to decrypt conversation history, inbound +sessions should store a copy of their earliest known ratchet value (unless they +explicitly want to drop the ability to decrypt that history - see [Partial +Forward Secrecy](#partial-forward-secrecy)). They may also choose to cache calculated ratchet values, +but the decision of which ratchet states to cache is left to the application. + +### Data exchange formats + +#### Session sharing format + +This format is used for the initial sharing of a Megolm session with other +group participants who need to be able to read messages encrypted by this +session. + +The session sharing format is as follows: + +```text ++---+----+--------+--------+--------+--------+------+-----------+ +| V | i | R(i,0) | R(i,1) | R(i,2) | R(i,3) | Kpub | Signature | ++---+----+--------+--------+--------+--------+------+-----------+ +0 1 5 37 69 101 133 165 229 bytes +``` + +The version byte, ``V``, is ``"\x02"``. + +This is followed by the ratchet index, \(i\), which is encoded as a +big-endian 32-bit integer; the ratchet values \(R_{i,j}\); and the public +part of the Ed25519 keypair \(K\). + +The data is then signed using the Ed25519 keypair, and the 64-byte signature is +appended. + +#### Session export format + +Once the session is initially shared with the group participants, each +participant needs to retain a copy of the session if they want to maintain +their ability to decrypt messages encrypted with that session. + +For forward-secrecy purposes, a participant may choose to store a ratcheted +version of the session. But since the ratchet index is covered by the +signature, this would invalidate the signature. So we define a similar format, +called the *session export format*, which is identical to the [session sharing +format](#session-sharing-format) except for dropping the signature. + +The Megolm session export format is thus as follows: + +```text ++---+----+--------+--------+--------+--------+------+ +| V | i | R(i,0) | R(i,1) | R(i,2) | R(i,3) | Kpub | ++---+----+--------+--------+--------+--------+------+ +0 1 5 37 69 101 133 165 bytes +``` + +The version byte, ``V``, is ``"\x01"``. + +This is followed by the ratchet index, \(i\), which is encoded as a +big-endian 32-bit integer; the ratchet values \(R_{i,j}\); and the public +part of the Ed25519 keypair \(K\). + +#### Message format + +Megolm messages consist of a one byte version, followed by a variable length +payload, a fixed length message authentication code, and a fixed length +signature. + +```text ++---+------------------------------------+-----------+------------------+ +| V | Payload Bytes | MAC Bytes | Signature Bytes | ++---+------------------------------------+-----------+------------------+ +0 1 N N+8 N+72 bytes +``` + +The version byte, ``V``, is ``"\x03"``. + +The payload uses a format based on the [Protocol Buffers encoding][]. It +consists of the following key-value pairs: + +**Name**|**Tag**|**Type**|**Meaning** +:-----:|:-----:|:-----:|:-----: +Message-Index|0x08|Integer|The index of the ratchet, i +Cipher-Text|0x12|String|The cipher-text, Xi, of the message + +Within the payload, integers are encoded using a variable length encoding. Each +integer is encoded as a sequence of bytes with the high bit set followed by a +byte with the high bit clear. The seven low bits of each byte store the bits of +the integer. The least significant bits are stored in the first byte. + +Strings are encoded as a variable-length integer followed by the string itself. + +Each key-value pair is encoded as a variable-length integer giving the tag, +followed by a string or variable-length integer giving the value. + +The payload is followed by the MAC. The length of the MAC is determined by the +authenticated encryption algorithm being used (8 bytes in this version of the +protocol). The MAC protects all of the bytes preceding the MAC. + +The length of the signature is determined by the signing algorithm being used +(64 bytes in this version of the protocol). The signature covers all of the +bytes preceding the signature. + +### Limitations + +#### Message Replays + +A message can be decrypted successfully multiple times. This means that an +attacker can re-send a copy of an old message, and the recipient will treat it +as a new message. + +To mitigate this it is recommended that applications track the ratchet indices +they have received and that they reject messages with a ratchet index that +they have already decrypted. + +#### Lack of Transcript Consistency + +In a group conversation, there is no guarantee that all recipients have +received the same messages. For example, if Alice is in a conversation with Bob +and Charlie, she could send different messages to Bob and Charlie, or could +send some messages to Bob but not Charlie, or vice versa. + +Solving this is, in general, a hard problem, particularly in a protocol which +does not guarantee in-order message delivery. For now it remains the subject of +future research. + +#### Lack of Backward Secrecy + +[Backward secrecy](https://intensecrypto.org/public/lec_08_hash_functions_part2.html#sec-forward-and-backward-secrecy) +(also called 'future secrecy' or 'post-compromise security') is the property +that if current private keys are compromised, an attacker cannot decrypt +future messages in a given session. In other words, when looking +**backwards** in time at a compromise which has already happened, **current** +messages are still secret. + +By itself, Megolm does not possess this property: once the key to a Megolm +session is compromised, the attacker can decrypt any message that was +encrypted using a key derived from the compromised or subsequent ratchet +values. + +In order to mitigate this, the application should ensure that Megolm sessions +are not used indefinitely. Instead it should periodically start a new session, +with new keys shared over a secure channel. + + + +#### Partial Forward Secrecy + +[Forward secrecy](https://intensecrypto.org/public/lec_08_hash_functions_part2.html#sec-forward-and-backward-secrecy) +(also called 'perfect forward secrecy') is the property that if the current +private keys are compromised, an attacker cannot decrypt *past* messages in +a given session. In other words, when looking **forwards** in time towards a +potential future compromise, **current** messages will be secret. + +In Megolm, each recipient maintains a record of the ratchet value which allows +them to decrypt any messages sent in the session after the corresponding point +in the conversation. If this value is compromised, an attacker can similarly +decrypt past messages which were encrypted by a key derived from the +compromised or subsequent ratchet values. This gives 'partial' forward +secrecy. + +To mitigate this issue, the application should offer the user the option to +discard historical conversations, by winding forward any stored ratchet values, +or discarding sessions altogether. + +#### Dependency on secure channel for key exchange + +The design of the Megolm ratchet relies on the availability of a secure +peer-to-peer channel for the exchange of session keys. Any vulnerabilities in +the underlying channel are likely to be amplified when applied to Megolm +session setup. + +For example, if the peer-to-peer channel is vulnerable to an unknown key-share +attack, the entire Megolm session become similarly vulnerable. For example: +Alice starts a group chat with Eve, and shares the session keys with Eve. Eve +uses the unknown key-share attack to forward the session keys to Bob, who +believes Alice is starting the session with him. Eve then forwards messages +from the Megolm session to Bob, who again believes they are coming from +Alice. Provided the peer-to-peer channel is not vulnerable to this attack, Bob +will realise that the key-sharing message was forwarded by Eve, and can treat +the Megolm session as a forgery. + +A second example: if the peer-to-peer channel is vulnerable to a replay +attack, this can be extended to entire Megolm sessions. + +### License + +The Megolm specification (this document) is licensed under the Apache License, +Version 2.0 http://www.apache.org/licenses/LICENSE-2.0. + +[Ed25519]: http://ed25519.cr.yp.to/ +[HMAC-based key derivation function]: https://tools.ietf.org/html/rfc5869 +[HKDF-SHA-256]: https://tools.ietf.org/html/rfc5869 +[HMAC-SHA-256]: https://tools.ietf.org/html/rfc2104 +[SHA-256]: https://tools.ietf.org/html/rfc6234 +[AES-256]: http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf +[CBC]: http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf +[PKCS#7]: https://tools.ietf.org/html/rfc2315 +[Olm]: https://gitlab.matrix.org/matrix-org/olm/blob/master/docs/olm.md +[Protocol Buffers encoding]: https://developers.google.com/protocol-buffers/docs/encoding diff --git a/content/proposals.md b/content/proposals.md index eb7fd3da..acaa2172 100644 --- a/content/proposals.md +++ b/content/proposals.md @@ -1,6 +1,6 @@ --- title: "Spec Change Proposals" -weight: 60 +weight: 62 type: docs --- diff --git a/layouts/_markup/render-passthrough.html b/layouts/_markup/render-passthrough.html new file mode 100644 index 00000000..ff03f3e5 --- /dev/null +++ b/layouts/_markup/render-passthrough.html @@ -0,0 +1,9 @@ +{{- $opts := dict "output" "htmlAndMathml" "displayMode" (eq .Type "block") }} +{{- with try (transform.ToMath .Inner $opts) }} + {{- with .Err }} + {{- errorf "Unable to render mathematical markup to HTML using the transform.ToMath function. The KaTeX display engine threw the following error: %s: see %s." . $.Position }} + {{- else }} + {{- .Value }} + {{- $.Page.Store.Set "hasMath" true }} + {{- end }} +{{- end -}} diff --git a/layouts/docs/baseof.html b/layouts/docs/baseof.html index ef748ab7..a5a43c0a 100644 --- a/layouts/docs/baseof.html +++ b/layouts/docs/baseof.html @@ -12,6 +12,9 @@ class="no-js"> {{ partial "head.html" . }} + {{ if .Page.Store.Get "hasMath" }} + + {{ end }}