Changes

Jump to navigation Jump to search
14,747 bytes added ,  19:53, 11 August 2017
Created page with "Like most Tegra based devices, the Switch's GPU also includes a Falcon microprocessor. = Driver = A host driver for communicating with the Falcon is mapped to physical addres..."
Like most Tegra based devices, the Switch's GPU also includes a Falcon microprocessor.

= Driver =
A host driver for communicating with the Falcon is mapped to physical address 0x54500000 with a total size of 0x40000 bytes and exposes several registers.

== Registers ==
{| class="wikitable" border="1"
! Name
! Address
! Width
|-
| [[#FALCON_IRQMSET|FALCON_IRQMSET]]
| 0x54501010
| 0x04
|-
| [[#FALCON_IRQDEST|FALCON_IRQDEST]]
| 0x5450101C
| 0x04
|-
| [[#FALCON_SCRATCH0|FALCON_SCRATCH0]]
| 0x54501040
| 0x04
|-
| [[#FALCON_SCRATCH1|FALCON_SCRATCH1]]
| 0x54501044
| 0x04
|-
| [[#FALCON_ITFEN|FALCON_ITFEN]]
| 0x54501048
| 0x04
|-
| [[#FALCON_CPUCTL|FALCON_CPUCTL]]
| 0x54501100
| 0x04
|-
| [[#FALCON_BOOTVEC|FALCON_BOOTVEC]]
| 0x54501104
| 0x04
|-
| [[#FALCON_DMACTL|FALCON_DMACTL]]
| 0x5450110C
| 0x04
|-
| [[#FALCON_DMATRFBASE|FALCON_DMATRFBASE]]
| 0x54501110
| 0x04
|-
| [[#FALCON_DMATRFMOFFS|FALCON_DMATRFMOFFS]]
| 0x54501114
| 0x04
|-
| [[#FALCON_DMATRFCMD|FALCON_DMATRFCMD]]
| 0x54501118
| 0x04
|-
| [[#FALCON_DMATRFFBOFFS|FALCON_DMATRFFBOFFS]]
| 0x5450111C
| 0x04
|-
|}

=== FALCON_IRQMSET ===
Used for configuring Falcon's IRQs.

=== FALCON_IRQDEST ===
Used for configuring Falcon's IRQs.

=== FALCON_SCRATCH0 ===
MMIO register for reading/writing data to Falcon.

=== FALCON_SCRATCH1 ===
MMIO register for reading/writing data to Falcon.

=== FALCON_ITFEN ===
{| class="wikitable" border="1"
! Bits
! Description
|-
| 0
| FALCON_ITFEN_CTXEN
|-
| 1
| FALCON_ITFEN_MTHDEN
|-
|}

Used for enabling/disabling Falcon interfaces.

=== FALCON_CPUCTL ===
{| class="wikitable" border="1"
! Bits
! Description
|-
| 0
| FALCON_CPUCTL_STARTCPU
|-
|}

Used for signaling Falcon's CPU.

=== FALCON_BOOTVEC ===
Takes the Falcon's boot vector address.

=== FALCON_DMACTL ===
{| class="wikitable" border="1"
! Bits
! Description
|-
| 1
| FALCON_DMACTL_DMEM_SCRUBBING
|-
| 2
| FALCON_DMACTL_IMEM_SCRUBBING
|-
|}

Used for configuring the Falcon's DMA engine.

=== FALCON_DMATRFBASE ===
Takes the host's base address for transferring data to/from the Falcon (DMA).

=== FALCON_DMATRFMOFFS ===
Takes the offset for the host's source memory being transferred.

=== FALCON_DMATRFCMD ===
{| class="wikitable" border="1"
! Bits
! Description
|-
| 1
| FALCON_DMATRFCMD_IDLE (this is set if the engine is idle)
|-
| 4
| FALCON_DMATRFCMD_IMEM
|-
| 9-10
| FALCON_DMATRFCMD_SIZE_256B
|-
|}

Used for configuring DMA transfers.

=== FALCON_DMATRFFBOFFS ===
Takes the offset for Falcon's target memory being transferred.

= Boot Process =
The Falcon is configured and initialized by the first bootloader during key generation (sub_400114FC).

== Initialization ==
During this stage several clocks are programmed.
// Program the HOST1X clock and resets
set_host1x_clkrst();

// Program the TSEC clock and resets
set_tsec_clkrst();

// Program the QSPI clock and resets (no source)
set_qspi_clkrst();

// Program the SOR1 clock and resets (no source)
set_sor1_clkrst0();

// Program the SOR1 clock and resets
set_sor1_clkrst1();

// Enable clock and resets for H group
set_h_clkrst();

== Configuration ==
In this stage the Falcon IRQs, interfaces and DMA engine are configured.
// Clear the Falcon DMA control register
*(u32 *)FALCON_DMACTL = 0;

// Enable Falcon IRQs
*(u32 *)FALCON_IRQMSET = 0xFFF2;

// Enable Falcon IRQs
*(u32 *)FALCON_IRQDEST = 0xFFF0;

// Enable Falcon interfaces
*(u32 *)FALCON_ITFEN = 0x03;

// Wait for Falcon's DMA engine to be idle
wait_flcn_dma_idle();

== Firmware loading ==
The Falcon firmware code is stored in the first bootloader's data segment in IMEM.
// Set DMA transfer base address to 0x40011900 >> 0x08
*(u32 *)FALCON_DMATRFBASE = 0x400119;

u32 trf_mode = 0; // A value of 0 sets FALCON_DMATRFCMD_IMEM
u32 dst_offset = 0;
u32 src_offset = 0;

// Load code into Falcon (0x100 bytes at a time)
while (src_offset < 0xF00)
{
flcn_load_firm(trf_mode, src_offset, dst_offset);
src_offset += 0x100;
dst_offset += 0x100;
}

== Firmware booting ==
Falcon is booted up and the first bootloader waits for it to finish.
// Set host1x sync config
*(u32 *)0x50003300 = 0x34C2E1DA;

// Clear Falcon scratch1 MMIO
*(u32 *)FALCON_SCRATCH1 = 0;

// Set Falcon boot key version in scratch0 MMIO
*(u32 *)FALCON_SCRATCH0 = 0x01;

// Set Falcon's boot vector address
*(u32 *)FALCON_BOOTVEC = 0;

// Signal Falcon's CPU
*(u32 *)FALCON_CPUCTL = 0x02;

// Wait for Falcon's DMA engine to be idle
wait_flcn_dma_idle();

u32 boot_res = 0;
u32 time = 0;

while (!boot_res)
{
// Read boot result from scratch1 MMIO
boot_res = *(u32 *)FALCON_SCRATCH1;

// Read from RTC_MILLISECONDS
time = rtc_read();

// Booting is taking too long
if (time > 2000000)
panic();
}

// Invalid boot result was returned
if (boot_res != 0xB0B0B0B0)
panic();

== Keygen ==
In this final stage the Falcon device key is generated by reading SOR registers modified by Falcon.
// Clear host1x sync config
*(u32 *)0x50003300 = 0;

// Generate Falcon device key
u32 falcon_device_key[4];
falcon_device_key[0] = *(u32 *)NV_SOR_DP_HDCP_BKSV_LSB;
falcon_device_key[1] = *(u32 *)NV_SOR_TMDS_HDCP_BKSV_LSB;
falcon_device_key[2] = *(u32 *)NV_SOR_TMDS_HDCP_CN_MSB;
falcon_device_key[3] = *(u32 *)NV_SOR_TMDS_HDCP_CN_LSB;

// Clear SOR registers
*(u32 *)NV_SOR_DP_HDCP_BKSV_LSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_BKSV_LSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_CN_MSB = 0;
*(u32 *)NV_SOR_TMDS_HDCP_CN_LSB = 0;

if (out_size < 0x10)
out_size = 0x10;

// Copy back the Falcon key
memcpy(out_buf, falcon_device_key, out_size);

// Deprogram UARTC and I2C5 clock and resets
unset_uartc_i2c5_clkrst();

// Deprogram SOR1 clock and resets
unset_sor1_clkrst1();

// Deprogram SOR1 clock and resets (no source)
unset_sor1_clkrst0();

// Deprogram QSPI clock and resets (no source)
unset_qspi_clkrst();

// Deprogram TSEC clock and resets
unset_tsec_clkrst();

// Deprogram HOST1X clock and resets
unset_host1x_clkrst()

return;

= Firmware =
The actual code loaded into Falcon is assembled in NVIDIA's proprietary fuc5 ISA using crypto extensions.
Stored inside the first bootloader, this firmware binary is split into 4 blobs: Stage0, Stage1, Stage2 and key data.

== Stage 0 ==
During this stage key data is loaded and Stage 1 is authenticated, loaded and executed.
Before returning, this stage writes back to the host (using MMIO registers) and sets the device key used by the first bootloader.

=== Initialization ===
Falcon sets up it's own stack pointer.
// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x09;
data_seg_size &= 0x1FF;
data_seg_size <<= 0x08;

// Set the stack pointer
*(u32 *)sp = data_seg_size;

u32 boot_base_addr = 0;
u32 key_data_buf[0x7C];

=== Stage 1 loading ===
// Read the key data from memory
u32 key_data_addr = 0x300;
u32 key_data_size = 0x7C;
read_code(key_data_buf, key_data_addr, key_data_size);

// Read the next code segment into boot base
u32 blob1_addr = 0x400;
u32 blob1_size = *(u32 *)(key_data_buf + 0x74);
read_code(boot_base_addr, blob1_addr, blob1_size);

// Upload the next code segment into Falcon's CODE region
u32 blob1_virt_addr = 0x300;
bool use_secret = true;
upload_code(blob1_virt_addr, boot_base_addr, blob1_size, blob1_virt_addr, use_secret);

u32 boot_res = 0;
bool is_done = false;
u32 time = 0;
bool is_blob_dec = false;

while (!is_done)
{
if (time > 4000000)
{
// Write boot failed (timeout) magic to FALCON_SCRATCH1
boot_res = 0xC0C0C0C0;
*(u32 *)FALCON_SCRATCH1 = boot_res;

break;
}

// Load key version from FALCON_SCRATCH0 (bootloader sends 0x01)
u32 key_version = *(u32 *)FALCON_SCRATCH0;

if (key_version == 0x64)
{
// Skip all next stages
boot_res = 0xB0B0B0B0;
*(u32 *)FALCON_SCRATCH1 = boot_res;

break;
}
else
{
if (key_version > 0x03)
boot_res = 0xD0D0D0D0; // Invalid key version
else if (key_version == 0)
boot_res = 0xB0B0B0B0; // No keys used
else
{
u32 key_buf[0x7C];

// Copy key data
memcpy(key_buf, key_data_buf, 0x7C);

u32 xfer_size_flag = 0x00060000;
u32 blob1_hash_addr = key_buf + 0x20;

// fuc5 crypt cauth instruction
// Set auth_addr to 0x300 and auth_size to blob1_size
*(u32 *)cauth = ((blob1_size << 0x10) | (0x300 >> 0x08));

// fuc5 crypt cxset instruction
// Set crypto transfer mode
*(u32 *)cx = 0x02;

// Transfer data from/to Falcon
xdst(0, (blob1_hash_addr | xfer_size_flag));

// Wait for all data loads/stores to finish
xdwait();

// Jump to Stage1
u32 stage1_res = exec_stage1(key_buf, key_version, is_blob_dec);
is_blob_dec = true; // Set this to prevent decrypting again

// Set boot finish magic on success
if (stage1_res == 0)
boot_res = 0xB0B0B0B0
}

// Write result to FALCON_SCRATCH1
*(u32 *)FALCON_SCRATCH1 = boot_res;

if (boot_res == 0xB0B0B0B0)
is_done = true;
}

time++;
}

// Write Falcon device key to registers
set_device_key(key_data_buf);

return boot_res;

== Stage 1 ==
This stage is responsible for reconfiguring the Falcon's crypto co-processor and loading, decrypting, authenticating and executing Stage 2.

=== Crypto setup ===
// Clear interrupt flags
*(u8 *)flags_ie0 = 0;
*(u8 *)flags_ie1 = 0;
*(u8 *)flags_ie2 = 0;

// fuc5 crypt cxset instruction
// Set crypto transfer mode
*(u32 *)cx = 0x80;

// fuc5 crypt cauth instruction
*(u32 *)cauth &= 0x7FFFF;

// Set the target port for memory transfers
// Target will now be 0 (crypto)
xtargets(0);

// Wait for all data loads/stores to finish
xdwait();

// Wait for all code loads to finish
xcwait();

// fuc5 crypt cxset instruction
// Set crypto transfer mode
*(u32 *)cx = 0x02;

// Transfer data from/to Falcon
// This should clear all previous hashes
xdst(0, 0);

// Wait for all data loads/stores to finish
xdwait();

// Clear crypto registers
*(u32 *)c0 ^= *(u32 *)c0;
*(u32 *)c1 = *(u32 *)c0;
*(u32 *)c2 = *(u32 *)c0;
*(u32 *)c3 = *(u32 *)c0;
*(u32 *)c4 = *(u32 *)c0;
*(u32 *)c5 = *(u32 *)c0;
*(u32 *)c7 = *(u32 *)c0;

// Update engine specific IO (crypto?)
*(u32 *)0x00020E00 &= 0xEFFFF;

// Update engine specific IO (crypto?)
*(u32 *)0x00010600 |= 0x01;

u32 wait_10600 = 0;

// Wait for some device
while (wait_10600 == 0)
wait_10600 = (*(u32 *)0x00010600 & 0x02);

// Read data segment size from IO space
u32 data_seg_size = *(u32 *)UC_CAPS;
data_seg_size >>= 0x09;
data_seg_size &= 0x1FF;
data_seg_size <<= 0x08;

// Check stack bounds
if ((*(u32 *)sp >= data_seg_size) || (*(u32 *)sp < 0x800))
return;

// Decrypt and load Stage2
load_stage2(key_buf, key_version, is_blob_dec);

// Clear crypto registers
*(u32 *)c0 ^= *(u32 *)c0;
*(u32 *)c1 ^= *(u32 *)c1;
*(u32 *)c2 ^= *(u32 *)c2;
*(u32 *)c3 ^= *(u32 *)c3;
*(u32 *)c4 ^= *(u32 *)c4;
*(u32 *)c5 ^= *(u32 *)c5;
*(u32 *)c6 ^= *(u32 *)c6;
*(u32 *)c7 ^= *(u32 *)c7;

// Signal unknown engine
*(u32 *)0x00010300 = 0;

return;

=== Stage 2 loading ===
u32 res = 0;

u32 boot_base_addr = 0;
u32 blob0_addr = 0;
u32 blob0_size = *(u32 *)(key_buf + 0x70);

// Load blob0 code again
read_code(boot_base_addr, blob0_addr, blob0_size);

// Generate "CODE_SIG_01" key into c4 crypto register
keygen(0, 0);

// Encrypt buffer with c4
u32 sig_key[0x10];
enc_buf(sig_key, blob0_size);

u32 src_addr = boot_base_addr;
u32 src_size = blob0_size;
u32 iv_addr = sig_key;
u32 dst_addr = sig_key;
u32 mode = 0x02; // AES-CMAC
u32 version = 0;

// Do AES-CMAC over blob0 code
do_crypto(src_addr, src_size, iv_addr, dst_addr, mode, version);

// Compare the hashes
if (memcmp(sig_key, key_buf + 0x10, 0x10))
{
res = 0xDEADBEEF;
return res;
}

u32 blob1_size = *(u32 *)(key_buf + 0x74);

// Decrypt Stage2 blob if needed
if (!is_blob_dec)
{
// Read Stage2's size from key buffer
u32 blob2_size = *(u32 *)(key_buf + 0x78);

// Check stack bounds
if (*(u32 *)sp > blob2_size)
{
u32 boot_base_addr = 0;
u32 blob2_virt_addr = blob0_size + blob1_size;
u32 blob2_addr = blob2_virt_addr + 0x100;

// Read Stage2's encrypted blob
read_code(boot_base_addr, blob2_addr, blob2_size);

// Generate "CODE_ENC_01" key into c4 crypt register
keygen(0x01, 0x01);

u32 src_addr = boot_base_addr;
u32 src_size = blob2_size;
u32 iv_addr = key_buf + 0x40;
u32 dst_addr = boot_base_addr;
u32 mode = 0; // AES-128-CBC
u32 version = 0;

// Decrypt Stage2
do_crypto(src_addr, src_size, iv_addr, dst_addr, mode, version);

// Upload the next code segment into Falcon's CODE region
bool use_secret = true;
upload_code(blob2_virt_addr, boot_base_addr, blob2_size, blob2_virt_addr, use_secret);

// Clear out the decrypted blob
memset(boot_base_addr, 0, blob2_size);
}
}

// fuc5 crypt cxset instruction
// Set crypto transfer mode
*(u32 *)cx = 0x02;

u32 xfer_size_flag = 0x00060000;
u32 blob2_hash_addr = key_buf + 0x30;

// Transfer data from/to Falcon
xdst(0, (blob2_hash_addr | xfer_size_flag));

// Wait for all data loads/stores to finish
xdwait();

// Save previous cauth value
u32 cauth_old = *(u32 *)cauth;

// fuc5 crypt cauth instruction
// Set auth_addr to blob2_virt_addr and auth_size to blob2_size
*(u32 *)cauth = ((blob2_virt_addr >> 0x08) | (blob2_size << 0x10));

u32 hovi_key_addr = 0;

// Select next stage key
if (key_version == 0x01) // Use HOVI_EKS_01
hovi_key_addr = key_buf + 0x50;
else if (key_version == 0x02) // Use HOVI_COMMON_01
hovi_key_addr = key_buf + 0x60;
else if (key_version == 0x03) // Use device key
hovi_key_addr = key_buf + 0x00;
else
res = 0xD0D0D0D0

// Jump to Stage2
if (hovi_key_addr)
res = exec_stage2(hovi_key_addr, key_version);

// Clear out key data
memset(key_buf, 0, 0x7C);

// Restore previous cauth value
*(u32 *)cauth = cauth_old;

return res;

== Stage 2 ==
This stage is decrypted by Stage 1 using an hardware secret. It is likely to be the main firmware code that stays running on the Falcon after the Switch's kernel is loaded (HOVI == Horizon VI?).

== Key data ==
Small buffer stored after Stage 0's code and used across all stages.

Navigation menu