Changes

33,819 bytes added ,  06:51, 4 January 2019
Created page with "= Boot Process = TSEC is configured and initialized by the first bootloader during key generation. [6.2.0+] TSEC is now configured at the end of the first bootloader's main f..."
= Boot Process =
TSEC is configured and initialized by the first bootloader during key generation.

[6.2.0+] TSEC is now configured at the end of the first bootloader's main function.

== Initialization ==
During this stage several clocks are programmed.
// Program the HOST1X clock and resets
// Uses RST_DEVICES_L, CLK_OUT_ENB_L, CLK_SOURCE_HOST1X and CLK_L_HOST1X
enable_host1x_clkrst();

// Program the TSEC clock and resets
// Uses RST_DEVICES_U, CLK_OUT_ENB_U, CLK_SOURCE_TSEC and CLK_U_TSEC
enable_tsec_clkrst();

// Program the SOR_SAFE clock and resets
// Uses RST_DEVICES_Y, CLK_OUT_ENB_Y and CLK_Y_SOR_SAFE
enable_sor_safe_clkrst();

// Program the SOR0 clock and resets
// Uses RST_DEVICES_X, CLK_OUT_ENB_X and CLK_X_SOR0
enable_sor0_clkrst();

// Program the SOR1 clock and resets
// Uses RST_DEVICES_X, CLK_OUT_ENB_X, CLK_SOURCE_SOR1 and CLK_X_SOR1
enable_sor1_clkrst();

// Program the KFUSE clock resets
// Uses RST_DEVICES_H, CLK_OUT_ENB_H and CLK_H_KFUSE
enable_kfuse_clkrst();

== Configuration ==
In this stage the Falcon IRQs, interfaces and DMA engine are configured.
// Clear the Falcon DMA control register
*(u32 *)FALCON_DMACTL = 0;

// Enable Falcon IRQs
*(u32 *)FALCON_IRQMSET = 0xFFF2;

// Enable Falcon IRQs
*(u32 *)FALCON_IRQDEST = 0xFFF0;

// Enable Falcon interfaces
*(u32 *)FALCON_ITFEN = 0x03;

// Wait for Falcon's DMA engine to be idle
wait_flcn_dma_idle();

== Firmware loading ==
The Falcon firmware code is stored in the first bootloader's data segment in IMEM.
// Set DMA transfer base address to 0x40011900 >> 0x08
*(u32 *)FALCON_DMATRFBASE = 0x400119;

u32 trf_mode = 0; // A value of 0 sets FALCON_DMATRFCMD_IMEM
u32 dst_offset = 0;
u32 src_offset = 0;

// Load code into Falcon (0x100 bytes at a time)
while (src_offset < 0xF00)
{
flcn_load_firm(trf_mode, src_offset, dst_offset);
src_offset += 0x100;
dst_offset += 0x100;
}

[6.2.0+] The transfer base address and size of the Falcon firmware code changed.
// Set DMA transfer base address to 0x40010E00 >> 0x08
*(u32 *)FALCON_DMATRFBASE = 0x40010E;

u32 trf_mode = 0; // A value of 0 sets FALCON_DMATRFCMD_IMEM
u32 dst_offset = 0;
u32 src_offset = 0;

// Load code into Falcon (0x100 bytes at a time)
while (src_offset < 0x2900)
{
flcn_load_firm(trf_mode, src_offset, dst_offset);
src_offset += 0x100;
dst_offset += 0x100;
}

== Firmware booting ==
Falcon is booted up and the first bootloader waits for it to finish.
// Set magic value in host1x scratch space
*(u32 *)0x50003300 = 0x34C2E1DA;

// Clear Falcon scratch1 MMIO
*(u32 *)FALCON_SCRATCH1 = 0;

// Set Falcon boot key version in scratch0 MMIO
*(u32 *)FALCON_SCRATCH0 = 0x01;

// Set Falcon's boot vector address
*(u32 *)FALCON_BOOTVEC = 0;

// Signal Falcon's CPU
*(u32 *)FALCON_CPUCTL = 0x02;

// Wait for Falcon's DMA engine to be idle
wait_flcn_dma_idle();

u32 boot_res = 0;

// The bootloader allows the TSEC two seconds from this point to do its job
u32 maximum_time = read_timer() + 2000000;

while (!boot_res)
{
// Read boot result from scratch1 MMIO
boot_res = *(u32 *)FALCON_SCRATCH1;

// Read from TIMERUS_CNTR_1US (microseconds from boot)
u32 current_time = read_timer();

// Booting is taking too long
if (current_time > maximum_time)
panic();
}

// Invalid boot result was returned
if (boot_res != 0xB0B0B0B0)
panic();

[6.2.0+] Falcon is booted up, but the first bootloader is left in an infinite loop.
// Set magic value in host1x scratch space
*(u32 *)0x50003300 = 0x34C2E1DA;

// Clear Falcon scratch1 MMIO
*(u32 *)FALCON_SCRATCH1 = 0;

// Set Falcon boot key version in scratch0 MMIO
*(u32 *)FALCON_SCRATCH0 = 0x01;

// Set Falcon's boot vector address
*(u32 *)FALCON_BOOTVEC = 0;

// Signal Falcon's CPU
*(u32 *)FALCON_CPUCTL = 0x02;

// Infinite loop
deadlock();

== TSEC key generation ==
The TSEC key is generated by reading SOR1 registers modified by the Falcon CPU.
// Clear magic value in host1x scratch space
*(u32 *)0x50003300 = 0;

// Read TSEC key
u32 tsec_key[4];
tsec_key[0] = *(u32 *)NV_SOR_DP_HDCP_BKSV_LSB;
tsec_key[1] = *(u32 *)NV_SOR_TMDS_HDCP_BKSV_LSB;
tsec_key[2] = *(u32 *)NV_SOR_TMDS_HDCP_CN_MSB;
tsec_key[3] = *(u32 *)NV_SOR_TMDS_HDCP_CN_LSB;

// Clear SOR1 registers
*(u32 *)NV_SOR_DP_HDCP_BKSV_LSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_BKSV_LSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_CN_MSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_CN_LSB = 0;

if (out_size < 0x10)
out_size = 0x10;

// Copy back the TSEC key
memcpy(out_buf, tsec_key, out_size);

[6.2.0+] This is now done inside an encrypted TSEC payload.

== Cleanup ==
Clocks and resets are disabled before returning.
// Deprogram KFUSE clock and resets
// Uses RST_DEVICES_H, CLK_OUT_ENB_H and CLK_H_KFUSE
disable_kfuse_clkrst();

// Deprogram SOR1 clock and resets
// Uses RST_DEVICES_X, CLK_OUT_ENB_X, CLK_SOURCE_SOR1 and CLK_X_SOR1
disable_sor1_clkrst();

// Deprogram SOR0 clock and resets
// Uses RST_DEVICES_X, CLK_OUT_ENB_X and CLK_X_SOR0
disable_sor0_clkrst();

// Deprogram SOR_SAFE clock and resets
// Uses RST_DEVICES_Y, CLK_OUT_ENB_Y and CLK_Y_SOR_SAFE
disable_sor_safe_clkrst();

// Deprogram TSEC clock and resets
// Uses RST_DEVICES_U, CLK_OUT_ENB_U, CLK_SOURCE_TSEC and CLK_U_TSEC
disable_tsec_clkrst();

// Deprogram HOST1X clock and resets
// Uses RST_DEVICES_L, CLK_OUT_ENB_L, CLK_SOURCE_HOST1X and CLK_L_HOST1X
disable_host1x_clkrst();

return;

= TSEC Firmware =
The actual code loaded into TSEC is assembled in NVIDIA's proprietary fuc5 ISA using crypto extensions.
Stored inside the first bootloader, this firmware binary is split into 4 blobs (names are unofficial): [[#Boot|Boot]] (unencrypted and unauthenticated code), [[#KeygenLdr|KeygenLdr]] (unencrypted and authenticated code), [[#Keygen|Keygen]] (encrypted and authenticated code) and [[#Key data|key data]].

[6.2.0+] There are now 6 blobs (names are unofficial): [[#Boot|Boot]] (unencrypted and unauthenticated code), [[#Loader|Loader]] (unencrypted and unauthenticated code), [[#KeygenLdr|KeygenLdr]] (unencrypted and authenticated code), [[#Keygen|Keygen]] (encrypted and authenticated code), [[#Payload|Payload]] (part unencrypted and unauthenticated code, part encrypted and authenticated code) and [[#Key data|key data]].

Firmware can be disassembled with [http://envytools.readthedocs.io/en/latest/ envytools'] [https://github.com/envytools/envytools/tree/master/envydis envydis]:

<code>envydis -i tsec_fw.bin -m falcon -V fuc5 -F crypt</code>

Note that the instruction set has variable length instructions, and the disassembler is not very good at detecting locations it should start disassembling from. One needs to disassemble multiple sub-regions and join them together.

== Boot ==
During this stage, [[#Key data|key data]] is loaded and [[#KeygenLdr|KeygenLdr]] is authenticated, loaded and executed.
Before returning, this stage writes back to the host (using MMIO registers) and sets the key used by the first bootloader.

[6.2.0+] During this stage, [[#Key data|key data]] is loaded and execution jumps to [[#Loader|Loader]].

=== Initialization ===
Falcon sets up it's own stack pointer.
// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x09;
data_seg_size &= 0x1FF;
data_seg_size <<= 0x08;

// Set the stack pointer
*(u32 *)sp = data_seg_size;

=== Main ===
Falcon reads the [[#Key data|key data]] and then authenticates, loads and executes [[#KeygenLdr|KeygenLdr]] which sets the TSEC key.
u32 boot_base_addr = 0;
u8 key_data_buf[0x7C];

// Read the key data from memory
u32 key_data_addr = 0x300;
u32 key_data_size = 0x7C;
read_code(key_data_buf, key_data_addr, key_data_size);

// Read the next code segment into boot base
u32 blob1_addr = 0x400;
u32 blob1_size = *(u32 *)(key_data_buf + 0x74);
read_code(boot_base_addr, blob1_addr, blob1_size);

// Upload the next code segment into Falcon's CODE region
u32 blob1_virt_addr = 0x300;
bool use_secret = true;
upload_code(blob1_virt_addr, boot_base_addr, blob1_size, blob1_virt_addr, use_secret);

u32 boot_res = 0;
bool is_done = false;
u32 time = 0;
bool is_blob_dec = false;

while (!is_done)
{
if (time > 4000000)
{
// Write boot failed (timeout) magic to FALCON_SCRATCH1
boot_res = 0xC0C0C0C0;
*(u32 *)FALCON_SCRATCH1 = boot_res;

break;
}

// Load key version from FALCON_SCRATCH0 (bootloader sends 0x01)
u32 key_version = *(u32 *)FALCON_SCRATCH0;

if (key_version == 0x64)
{
// Skip all next stages
boot_res = 0xB0B0B0B0;
*(u32 *)FALCON_SCRATCH1 = boot_res;

break;
}
else
{
if (key_version > 0x03)
boot_res = 0xD0D0D0D0; // Invalid key version
else if (key_version == 0)
boot_res = 0xB0B0B0B0; // No keys used
else
{
u32 key_buf[0x7C];

// Copy key data
memcpy(key_buf, key_data_buf, 0x7C);

u32 crypt_reg_flag = 0x00060000;
u32 blob1_hash_addr = key_buf + 0x20;

// fuc5 crypt cauth instruction
// Set auth_addr to 0x300 and auth_size to blob1_size
cauth((blob1_size << 0x10) | (0x300 >> 0x08));

// fuc5 crypt cxset instruction
// The next 2 xfer instructions will be overridden
// and target changes from DMA to crypto
cxset(0x02);

// Transfer data to crypto register c6
xdst(0, (blob1_hash_addr | crypt_reg_flag));

// Wait for all data loads/stores to finish
xdwait();

// Jump to KeygenLdr
u32 keygenldr_res = exec_keygenldr(key_buf, key_version, is_blob_dec);
is_blob_dec = true; // Set this to prevent decrypting again

// Set boot finish magic on success
if (keygenldr_res == 0)
boot_res = 0xB0B0B0B0
}

// Write result to FALCON_SCRATCH1
*(u32 *)FALCON_SCRATCH1 = boot_res;

if (boot_res == 0xB0B0B0B0)
is_done = true;
}

time++;
}

// Overwrite the TSEC key in SOR1 registers
// This has no effect because the KeygenLdr locks out the TSEC DMA engine
tsec_set_key(key_data_buf);

return boot_res;

[6.2.0+] Falcon reads the [[#Key data|key data]] and jumps to [[#Loader|Loader]].
u8 key_data_buf[0x84];

// Read the key data from memory
u32 key_data_addr = 0x300;
u32 key_data_size = 0x84;
read_code(key_data_buf, key_data_addr, key_data_size);

// Calculate the next blob's address
u32 blob4_size = *(u32 *)(key_data_buf + 0x80);
u32 blob0_size = *(u32 *)(key_data_buf + 0x70);
u32 blob1_size = *(u32 *)(key_data_buf + 0x74);
u32 blob2_size = *(u32 *)(key_data_buf + 0x78);
u32 blob3_addr = ((((blob0_size + blob1_size) + 0x100) + blob2_size) + blob4_size);

// Jump to next blob
(void *)blob3_addr();

return 0;

==== tsec_set_key ====
This method takes '''key_data_buf''' (a 16 bytes buffer) as argument and writes its contents to SOR1 registers.
// This is TSEC_MMIO + 0x1000 + (0x1C300 / 0x40)
*(u32 *)TSEC_DMA_UNK = 0xFFF;

// Read the key's words
u32 key0 = *(u32 *)(key_data_buf + 0x00);
u32 key1 = *(u32 *)(key_data_buf + 0x04);
u32 key2 = *(u32 *)(key_data_buf + 0x08);
u32 key3 = *(u32 *)(key_data_buf + 0x0C);

u32 result = 0;

// Write to SOR1 register
result = tsec_dma_write(NV_SOR_DP_HDCP_BKSV_LSB, key0);

// Failed to write
if (result)
return result;

// Write to SOR1 register
result = tsec_dma_write(NV_SOR_TMDS_HDCP_BKSV_LSB, key1);

// Failed to write
if (result)
return result;

// Write to SOR1 register
result = tsec_dma_write(NV_SOR_TMDS_HDCP_CN_MSB, key2);

// Failed to write
if (result)
return result;

// Write to SOR1 register
result = tsec_dma_write(NV_SOR_TMDS_HDCP_CN_LSB, key3);

// Failed to write
if (result)
return result;

return result;

===== tsec_dma_write =====
This method takes '''addr''' and '''value''' as arguments and performs a DMA write using TSEC MMIO.
u32 result = 0;

// Wait for TSEC DMA engine
// This waits for bit 0x0C in TSEC_DMA_CMD to be 0
result = wait_tsec_dma();

// Wait failed
if (result)
return 1;

// Set the destination address
// This is TSEC_MMIO + 0x1000 + (0x1C100 / 0x40)
*(u32 *)TSEC_DMA_ADDR = addr;

// Set the value
// This is TSEC_MMIO + 0x1000 + (0x1C200 / 0x40)
*(u32 *)TSEC_DMA_VAL = value;

// Start transfer?
// This is TSEC_MMIO + 0x1000 + (0x1C000 / 0x40)
*(u32 *)TSEC_DMA_CMD = 0x800000F2;

// Wait for TSEC DMA engine
// This waits for bit 0x0C in TSEC_DMA_CMD to be 0
result = wait_tsec_dma();

// Wait failed
if (result)
return 1;

return 0;

== KeygenLdr ==
This stage is responsible for reconfiguring the Falcon's crypto co-processor and loading, decrypting, authenticating and executing [[#Keygen|Keygen]].

=== Main ===
// Clear interrupt flags
*(u8 *)flags_ie0 = 0;
*(u8 *)flags_ie1 = 0;
*(u8 *)flags_ie2 = 0;

// fuc5 crypt cxset instruction
// Clear overrides?
cxset(0x80);

// fuc5 crypt cauth instruction
// Clear bit 0x13 in cauth
cauth(cauth_old & ~(1 << 0x13));

// Set the target port for memory transfers
xtargets(0);

// Wait for all data loads/stores to finish
xdwait();

// Wait for all code loads to finish
xcwait();

// fuc5 crypt cxset instruction
// The next 2 xfer instructions will be overridden
// and target changes from DMA to crypto
cxset(0x02);

// Transfer data to crypto register c0
// This should clear any leftover data
xdst(0, 0);

// Wait for all data loads/stores to finish
xdwait();

// Clear all crypto registers, except c6 which is used for auth
cxor(c0, c0);
cmov(c1, c0);
cmov(c2, c0);
cmov(c3, c0);
cmov(c4, c0);
cmov(c5, c0);
cmov(c7, c0);

// Clear TSEC_TEGRA_CTL_TKFI_KFUSE
// This is TSEC_MMIO + 0x1000 + (0x20E00 / 0x40)
*(u32 *)TSEC_TEGRA_CTL &= 0xEFFFF;

// Set TSEC_SCP_CTL_PKEY_REQUEST_RELOAD
// This is TSEC_MMIO + 0x1000 + (0x10600 / 0x40)
*(u32 *)TSEC_SCP_CTL_PKEY |= 0x01;

u32 is_pkey_loaded = 0;

// Wait for TSEC_SCP_CTL_PKEY_LOADED
while (!is_pkey_loaded)
is_pkey_loaded = (*(u32 *)TSEC_SCP_CTL_PKEY & 0x02);

// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x09;
data_seg_size &= 0x1FF;
data_seg_size <<= 0x08;

// Check stack bounds
if ((*(u32 *)sp >= data_seg_size) || (*(u32 *)sp < 0x800))
exit();

// Decrypt and load Keygen stage
load_keygen(key_buf, key_version, is_blob_dec);

// fuc5 crypt csigclr instruction
// Clears the cauth signature
csigclr();

// Clear all crypto registers
cxor(c0, c0);
cxor(c1, c1);
cxor(c2, c2);
cxor(c3, c3);
cxor(c4, c4);
cxor(c5, c5);
cxor(c6, c6);
cxor(c7, c7);

// Exit Authenticated Mode
// This is TSEC_MMIO + 0x1000 + (0x10300 / 0x40)
*(u32 *)TSEC_SCP_CTL_AUTH_MODE = 0;

return;

==== load_keygen ====
u32 res = 0;

u32 boot_base_addr = 0;
u32 blob0_addr = 0;
u32 blob0_size = *(u32 *)(key_buf + 0x70);

// Load blob0 code again
read_code(boot_base_addr, blob0_addr, blob0_size);

// Generate "CODE_SIG_01" key into c4 crypto register
gen_usr_key(0, 0);

// Encrypt buffer with c4
u8 sig_key[0x10];
enc_buf(sig_key, blob0_size);

u32 src_addr = boot_base_addr;
u32 src_size = blob0_size;
u32 iv_addr = sig_key;
u32 dst_addr = sig_key;
u32 mode = 0x02; // AES-CMAC
u32 version = 0;

// Do AES-CMAC over blob0 code
do_crypto(src_addr, src_size, iv_addr, dst_addr, mode, version);

// Compare the hashes
if (memcmp(dst_addr, key_buf + 0x10, 0x10))
{
res = 0xDEADBEEF;
return res;
}

u32 blob1_size = *(u32 *)(key_buf + 0x74);

// Decrypt Keygen blob if needed
if (!is_blob_dec)
{
// Read Stage2's size from key buffer
u32 blob2_size = *(u32 *)(key_buf + 0x78);

// Check stack bounds
if (*(u32 *)sp > blob2_size)
{
u32 boot_base_addr = 0;
u32 blob2_virt_addr = blob0_size + blob1_size;
u32 blob2_addr = blob2_virt_addr + 0x100;

// Read Keygen encrypted blob
read_code(boot_base_addr, blob2_addr, blob2_size);

// Generate "CODE_ENC_01" key into c4 crypt register
gen_usr_key(0x01, 0x01);

u32 src_addr = boot_base_addr;
u32 src_size = blob2_size;
u32 iv_addr = key_buf + 0x40;
u32 dst_addr = boot_base_addr;
u32 mode = 0; // AES-128-ECB
u32 version = 0;

// Decrypt Keygen blob
do_crypto(src_addr, src_size, iv_addr, dst_addr, mode, version);

// Upload the next code segment into Falcon's CODE region
bool use_secret = true;
upload_code(blob2_virt_addr, boot_base_addr, blob2_size, blob2_virt_addr, use_secret);

// Clear out the decrypted blob
memset(boot_base_addr, 0, blob2_size);
}
}

// fuc5 crypt cxset instruction
// The next 2 xfer instructions will be overridden
// and target changes from DMA to crypto
cxset(0x02);

u32 crypt_reg_flag = 0x00060000;
u32 blob2_hash_addr = key_buf + 0x30;

// Transfer data to crypto register c6
xdst(0, (blob2_hash_addr | crypt_reg_flag));

// Wait for all data loads/stores to finish
xdwait();

// Save previous cauth value
u32 c_old = cauth_old;

// fuc5 crypt cauth instruction
// Set auth_addr to blob2_virt_addr and auth_size to blob2_size
cauth((blob2_virt_addr >> 0x08) | (blob2_size << 0x10));

u32 hovi_key_addr = 0;

// Select next stage key
if (key_version == 0x01) // Use HOVI_EKS_01
hovi_key_addr = key_buf + 0x50;
else if (key_version == 0x02) // Use HOVI_COMMON_01
hovi_key_addr = key_buf + 0x60;
else if (key_version == 0x03) // Use debug key (empty)
hovi_key_addr = key_buf + 0x00;
else
res = 0xD0D0D0D0

// Jump to Keygen
if (hovi_key_addr)
res = exec_keygen(hovi_key_addr, key_version);

// Clear out key data
memset(key_buf, 0, 0x7C);

// fuc5 crypt cauth instruction
// Restore previous cauth value
cauth(c_old);

return res;

===== gen_usr_key =====
This method takes '''type''' and '''mode''' as arguments and generates a key.
u8 seed_buf[0x10];

// Read a 16 bytes seed based on supplied type
/*
Type 0: "CODE_SIG_01" + null padding
Type 1: "CODE_ENC_01" + null padding
*/
get_seed(seed_buf, type);

// This will write the seed into crypto register c0
crypt_store(0, seed_buf);

// fuc5 csecret instruction
// Load selected secret into crypto register c1
csecret(c1, 0x26);

// fuc5 ckeyreg instruction
// Bind c1 register as the key for enc/dec operations
ckeyreg(c1);

// fuc5 cenc instruction
// Encrypt seed_buf (in c0) using keyreg value as key into c1
cenc(c1, c0);

// fuc5 csigenc instruction
// Encrypt c1 register with the auth signature stored in c6
csigenc(c1, c1);

// Copy the result into c4 (will be used as key)
cmov(c4, c1);

// Do key expansion (for decryption)
if (mode != 0)
ckexp(c4, c4); // fuc5 ckexp instruction

return;

===== enc_buffer =====
This method takes '''buf''' (a 16 bytes buffer) and '''size''' as arguments and encrypts the supplied buffer.
// Set first 3 words to null
*(u32 *)(buf + 0x00) = 0;
*(u32 *)(buf + 0x04) = 0;
*(u32 *)(buf + 0x08) = 0;

// Swap halves (b16, b32 and b16 again)
hswap(size);

// Store the size as the last word
*(u32 *)(buf + 0x0C) = size;

// This will write buf into crypto register c3
crypt_store(0x03, buf);

// fuc5 ckeyreg instruction
// Bind c4 register (from keygen) as the key for enc/dec operations
ckeyreg(c4);

// fuc5 cenc instruction
// Encrypt buf (in c3) using keyreg value as key into c5
cenc(c5, c3);

// This will read into buf from crypto register c5
crypt_load(0x05, buf);

return;

===== do_crypto =====
This is the method responsible for all crypto operations performed during [[#KeygenLdr|KeygenLdr]]. It takes '''src_addr''', '''src_size''', '''iv_addr''', '''dst_addr''', '''mode''' and '''use_imem''' as arguments.
// Check for invalid source data size
if (!src_size || (src_size & 0x0F))
exit();

// Check for invalid source data address
if (src_addr & 0x0F)
exit();

// Check for invalid destination data address
if (dst_addr & 0x0F)
exit();

// Use IV if available
if (iv_addr)
{
// This will write the iv_addr into crypto register c5
crypt_store(0x05, iv_addr);
}
else
{
// Clear c5 register (use null IV)
cxor(c5, c5);
}

// Use key in c4
ckeyreg(c4);

// AES-128-CBC decrypt
if (mode == 0x00)
{
// Create crypto script with 5 instructions
cs0begin(0x05);

cxsin(c3); // Read 0x10 bytes from crypto stream into c3
cdec(c2, c3); // Decrypt from c3 into c2
cxor(c5, c2); // XOR c2 with c5 and store in c5
cxsout(c5); // Write 0x10 bytes into crypto stream from c5
cmov(c5, c3); // Move c3 into c5
}
else if (mode == 0x01) // AES-128-CBC encrypt
{
// Create crypto script with 4 instructions
cs0begin(0x04);

cxsin(c3); // Read 0x10 bytes from crypto stream into c3
cxor(c3, c5); // XOR c5 with c3 and store in c3
cenc(c5, c3); // Encrypt from c3 into c5
cxsout(c5); // Write 0x10 bytes into crypto stream from c5
}
else if (mode == 0x02) // AES-CMAC
{
// Create crypto script with 3 instructions
cs0begin(0x03);

cxsin(c3); // Read 0x10 bytes from crypto stream into c3
cxor(c5, c3); // XOR c5 with c3 and store in c3
cenc(c5, c5); // Encrypt from c5 into c5
}
else if (mode == 0x03) // AES-128-ECB decrypt
{
// Create crypto script with 3 instructions
cs0begin(0x03);

cxsin(c3); // Read 0x10 bytes from crypto stream into c3
cdec(c5, c3); // Decrypt from c3 into c5
cxsout(c5); // Write 0x10 bytes into crypto stream from c5
}
else if (mode == 0x04) // AES-128-ECB encrypt
{
// Create crypto script with 3 instructions
cs0begin(0x03);

cxsin(c3); // Read 0x10 bytes from crypto stream into c3
cenc(c5, c3); // Encrypt from c3 into c5
cxsout(c5); // Write 0x10 bytes into crypto stream from c5
}
else
return;

// Main loop
while (src_size > 0)
{
u32 blk_count = (src_size >> 0x04);

if (blk_count > 0x10)
blk_count = 0x10;

// Check size align
if (blk_count & (blk_count - 0x01))
blk_count = 0x01;

u32 blk_size = (blk_count << 0x04);

u32 crypt_xfer_src = 0;
u32 crypt_xfer_dst = 0;

if (block_size == 0x20)
{
crypt_xfer_src = (0x00030000 | src_addr);
crypt_xfer_dst = (0x00030000 | dst_addr);

// Execute crypto script 2 times (1 for each block)
cs0exec(0x02);
}
if (block_size == 0x40)
{
crypt_xfer_src = (0x00040000 | src_addr);
crypt_xfer_dst = (0x00040000 | dst_addr);

// Execute crypto script 4 times (1 for each block)
cs0exec(0x04);
}
if (block_size == 0x80)
{
crypt_xfer_src = (0x00050000 | src_addr);
crypt_xfer_dst = (0x00050000 | dst_addr);

// Execute crypto script 8 times (1 for each block)
cs0exec(0x08);
}
if (block_size == 0x100)
{
crypt_xfer_src = (0x00060000 | src_addr);
crypt_xfer_dst = (0x00060000 | dst_addr);

// Execute crypto script 16 times (1 for each block)
cs0exec(0x10);
}
else
{
crypt_xfer_src = (0x00020000 | src_addr);
crypt_xfer_dst = (0x00020000 | dst_addr);

// Execute crypto script 1 time (1 for each block)
cs0exec(0x01);

// Ensure proper block size
block_size = 0x10;
}

// fuc5 crypt cxset instruction
// The next xfer instruction will be overridden
// and target changes from DMA to crypto input/output stream
if (use_imem)
cxset(0xA1); // Flag 0xA0 is falcon imem <-> crypto input/output stream
else
cxset(0x21); // Flag 0x20 is external mem <-> crypto input/output stream

// Transfer data into the crypto input/output stream
xdst(crypt_xfer_src, crypt_xfer_src);

// AES-CMAC only needs one more xfer instruction
if (mode == 0x02)
{
// fuc5 crypt cxset instruction
// The next xfer instruction will be overridden
// and target changes from DMA to crypto input/output stream
if (use_imem)
cxset(0xA1); // Flag 0xA0 is falcon imem <-> crypto input/output stream
else
cxset(0x21); // Flag 0x20 is external mem <-> crypto input/output stream

// Wait for all data loads/stores to finish
xdwait();
}
else // AES enc/dec needs 2 more xfer instructions
{
// fuc5 crypt cxset instruction
// The next 2 xfer instructions will be overridden
// and target changes from DMA to crypto input/output stream
if (use_imem)
cxset(0xA2); // Flag 0xA0 is falcon imem <-> crypto input/output stream
else
cxset(0x22); // Flag 0x20 is external mem <-> crypto input/output stream

// Transfer data from the crypto input/output stream
xdld(crypt_xfer_dst, crypt_xfer_dst);

// Wait for all data loads/stores to finish
xdwait();

// Increase the destination address by block size
dst_addr += block_size;
}

// Increase the source address by block size
src_addr += block_size;

// Decrease the source size by block size
src_size -= block_size;
}

// AES-CMAC result is in c5
if (mode == 0x02)
{
// This will read into dst_addr from crypto register c5
crypt_load(0x05, dst_addr);
}

return;

== Keygen ==
This stage is decrypted by [[#KeygenLdr|KeygenLdr]] using a key generated by encrypting a seed with an hardware secret. It will generate the final TSEC key.

== Loader ==
This stage starts by authenticating and executing [[#KeygenLdr|KeygenLdr]] which in turn authenticates, decrypts and executes [[#Keygen|Keygen]] (both blobs remain unchanged from previous firmware versions).
After the TSEC key has been generated, execution returns to this stage which then parses and executes [[#Payload|Payload]].

=== Main ===
u8 key_data_buf[0x84];
u8 tmp_key_data_buf[0x84];

// Read the key data from memory
u32 key_data_addr = 0x300;
u32 key_data_size = 0x84;
read_code(key_data_buf, key_data_addr, key_data_size);

// Read the KeygenLdr blob from memory
u32 boot_base_addr = 0;
u32 blob1_addr = 0x400;
u32 blob1_size = *(u32 *)(key_data_buf + 0x74);
read_code(boot_base_addr, blob1_addr, blob1_size);

// Upload the next code segment into Falcon's CODE region
u32 blob1_virt_addr = 0x300;
bool use_secret = true;
upload_code(blob1_virt_addr, boot_base_addr, blob1_size, blob1_virt_addr, use_secret);

// Backup the key data
memcpy(tmp_key_data_buf, key_data_buf, 0x84);

// Save previous cauth value
u32 c_old = cauth_old;

// fuc5 crypt cauth instruction
// Set auth_addr to 0x300 and auth_size to blob1_size
cauth((blob1_size << 0x10) | (0x300 >> 0x08));

// fuc5 crypt cxset instruction
// The next 2 xfer instructions will be overridden
// and target changes from DMA to crypto
cxset(0x02);

u32 crypt_reg_flag = 0x00060000;
u32 blob1_hash_addr = tmp_key_data_buf + 0x20;

// Transfer data to crypto register c6
xdst(0, (blob1_hash_addr | crypt_reg_flag));

// Wait for all data loads/stores to finish
xdwait();

u32 key_version = 0x01;
bool is_blob_dec = false;

// Jump to KeygenLdr
u32 keygenldr_res = exec_keygenldr(tmp_key_data_buf, key_version, is_blob_dec);

// Set boot finish magic on success
if (keygenldr_res == 0)
keygenldr_res = 0xB0B0B0B0

// Write result to FALCON_SCRATCH1
*(u32 *)FALCON_SCRATCH1 = keygenldr_res;

if (keygenldr_res != 0xB0B0B0B0)
return keygenldr_res;

// fuc5 crypt cauth instruction
// Restore previous cauth value
cauth(c_old);

u8 flcn_hdr_buf[0x18];
u8 flcn_os_hdr_buf[0x10];

blob1_size = *(u32 *)(key_data_buf + 0x74);
u32 blob2_size = *(u32 *)(key_data_buf + 0x78);
u32 blob0_size = *(u32 *)(key_data_buf + 0x70);

// Read the Payload blob's Falcon header from memory
u32 blob4_flcn_hdr_addr = (((blob0_size + blob1_size) + 0x100) + blob2_size);
read_code(flcn_hdr_buf, blob4_flcn_hdr_addr, 0x18);

blob1_size = *(u32 *)(key_data_buf + 0x74);
blob2_size = *(u32 *)(key_data_buf + 0x78);
blob0_size = *(u32 *)(key_data_buf + 0x70);
u32 flcn_hdr_size = *(u32 *)(flcn_hdr_buf + 0x0C);

// Read the Payload blob's Falcon OS header from memory
u32 blob4_flcn_os_hdr_addr = ((((blob0_size + blob1_size) + 0x100) + blob2_size) + flcn_hdr_size);
read_code(flcn_os_hdr_buf, blob4_flcn_os_hdr_addr, 0x10);

blob1_size = *(u32 *)(key_data_buf + 0x74);
blob2_size = *(u32 *)(key_data_buf + 0x78);
blob0_size = *(u32 *)(key_data_buf + 0x70);
u32 flcn_code_hdr_size = *(u32 *)(flcn_hdr_buf + 0x10);
u32 flcn_os_size = *(u32 *)(flcn_os_hdr_buf + 0x04);

// Read the Payload blob's Falcon OS image from memory
u32 blob4_flcn_os_addr = ((((blob0_size + blob1_size) + 0x100) + blob2_size) + flcn_code_hdr_size);
read_code(boot_base_addr, blob4_flcn_os_hdr_addr, flcn_os_size);

// Upload the Payload's Falcon OS image boot stub code segment into Falcon's CODE region
u32 blob4_flcn_os_boot_virt_addr = 0;
u32 blob4_flcn_os_boot_size = 0x100;
use_secret = false;
upload_code(blob4_flcn_os_boot_virt_addr, boot_base_addr, blob4_flcn_os_boot_size, blob4_flcn_os_boot_virt_addr, use_secret);

flcn_os_size = *(u32 *)(flcn_os_hdr_buf + 0x04);

// Upload the Payload blob's Falcon OS encrypted image code segment into Falcon's CODE region
u32 blob4_flcn_os_img_virt_addr = 0x100;
u32 blob4_flcn_os_img_size = (flcn_os_size - 0x100);
use_secret = true;
upload_code(blob4_flcn_os_img_virt_addr, boot_base_addr + 0x100, blob4_flcn_os_img_size, blob4_flcn_os_img_virt_addr, use_secret);

// Wait for all code loads to finish
xcwait();

blob1_size = *(u32 *)(key_data_buf + 0x74);
blob2_size = *(u32 *)(key_data_buf + 0x78);
blob0_size = *(u32 *)(key_data_buf + 0x70);
flcn_code_hdr_size = *(u32 *)(flcn_hdr_buf + 0x10);
u32 flcn_os_code_size = *(u32 *)(flcn_os_hdr_buf + 0x08);

// Read the Payload blob's falcon OS image's hash from memory
u32 blob4_flcn_os_img_hash_addr = (((((blob0_size + blob1_size) + 0x100) + blob2_size) + flcn_code_hdr_size) + flcn_os_code_size);
read_code(0, blob4_flcn_os_img_hash_addr, 0x10);

// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x03;
data_seg_size &= 0x3FC0;

u32 data_addr = 0x10;

// Clear all data except the first 0x10 bytes (Payload blob's Falcon OS image's hash)
for (int data_word_count = 0x04; data_word_count < data_seg_size; data_word_count++)
{
*(u32 *)(data_addr) = 0;
data_addr += 0x04;
}

// Clear all crypto registers
cxor(c0, c0);
cxor(c1, c1);
cxor(c2, c2);
cxor(c3, c3);
cxor(c4, c4);
cxor(c5, c5);
cxor(c6, c6);
cxor(c7, c7);

// fuc5 crypt csigclr instruction
// Clears the cauth signature
csigclr();

// Jump to Payload
exec_payload();

return 0xB0B0B0B0;

== Payload ==
This stage prepares the stack then authenticates, decrypts and executes the Payload blob's Falcon OS image.

=== Main ===
// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x01;
data_seg_size &= 0xFF00;

// Set the stack pointer
*(u32 *)sp = data_seg_size;

// Jump to the Payload blob's Falcon OS image boot stub
exec_flcn_os_boot();

// Halt execution
exit();

return;

==== exec_flcn_os_boot ====
// Read the transfer base address from IO space
u32 xfer_ext_base_addr = *(u32 *)XFER_EXT_BASE;

// Copy transfer base address to data memory
u32 scratch_data_addr = 0x300;
*(u32 *)scratch_data_addr = xfer_ext_base_addr;

// Set the transfer base address
xcbase(xfer_ext_base_addr);

// fuc5 crypt cxset instruction
// The next xfer instruction will be overridden
// and target changes from DMA to crypto
cxset(0x01);

u32 crypt_reg_flag = 0x00060000;
u32 blob4_flcn_os_img_hash_addr = 0;

// Transfer data to crypto register c6
xdst(0, (blob4_flcn_os_img_hash_addr | crypt_reg_flag));

// fuc5 crypt cxset instruction
// The next xfer instruction will be overridden
// and target changes from DMA to crypto
cxset(0x01);

// Wait for all data loads/stores to finish
xdwait();

cmov(c7, c6);
cxor(c7, c7);

// fuc5 crypt cauth instruction
// Set auth_addr to 0x100, auth_size to 0x1300,
// bit 16 (is_secret) and bit 17 (is_encrypted)
cauth((0x02 << 0x10) | (0x01 << 0x10) | (0x1300 << 0x10) | (0x100 >> 0x08));

// Clear interrupt flags
*(u8 *)flags_ie0 = 0;
*(u8 *)flags_ie1 = 0;

// Jump to the Payload blob's Falcon OS image
exec_flcn_os_img();

return 0x0F0F0F0F;

== Key data ==
Small buffer stored after the [[#Boot|Boot]] blob and used across all stages.

{| class="wikitable" border="1"
! Offset
! Size
! Description
|-
| 0x00
| 0x10
| Debug key (empty)
|-
| 0x10
| 0x10
| blob0 ([[#Boot|Boot]]) auth hash
|-
| 0x20
| 0x10
| blob1 ([[#KeygenLdr|KeygenLdr]]) auth hash
|-
| 0x30
| 0x10
| blob2 ([[#Keygen|Keygen]]) auth hash
|-
| 0x40
| 0x10
| blob2 ([[#Keygen|Keygen]]) AES IV
|-
| 0x50
| 0x10
| HOVI EKS seed
|-
| 0x60
| 0x10
| HOVI COMMON seed
|-
| 0x70
| 0x04
| blob0 ([[#Boot|Boot]]) size
|-
| 0x74
| 0x04
| blob1 ([[#KeygenLdr|KeygenLdr]]) size
|-
| 0x78
| 0x04
| blob2 ([[#Keygen|Keygen]]) size
|-
| 0x7C
| 0x04
| [6.2.0+] blob3 ([[#Loader|Loader]]) size
|-
| 0x80
| 0x04
| [6.2.0+] blob4 ([[#Payload|Payload]]) size
|}