Kernel Loader: Difference between revisions
(31 intermediate revisions by one other user not shown) | |||
Line 21: | Line 21: | ||
KernelLdr_ApplyRelocations(&KernelLdr_Main, __dynamic_start); | KernelLdr_ApplyRelocations(&KernelLdr_Main, __dynamic_start); | ||
KernelLdr_libc_init_array(); | KernelLdr_libc_init_array(); | ||
</pre> | |||
[9.0.0+] | |||
Then it clears TPIDR_EL1 to 0, and sets VBAR_EL1. | |||
<pre> | |||
// 9.0.0+ | |||
TPIDR_EL1 = 0 | |||
VBAR_EL1 = KernelLdr_ExceptionTable | |||
</pre> | </pre> | ||
Line 125: | Line 133: | ||
Next, it initializes the MMU with a basic identity mapping for Kernel + KernelLdr. | Next, it initializes the MMU with a basic identity mapping for Kernel + KernelLdr. | ||
<pre> | <pre> | ||
// | // Set page table region | ||
page_table_region = ini1_end_address; | |||
page_table_region_size = 0x200000; | |||
g_InitialPageAllocator.Initialize(page_table_region); | |||
// Initialize new page table, eventually ends up in TTBR1_EL1. | |||
KInitialPageTable ttbr1_page_table(&g_InitialPageAllocator); | |||
// Setup MMU with initial identity mapping. | |||
KernelLdr_MapInitialIdentityMapping(&ttbr1_page_table, kernel_base, rw_end_offset, page_table_region, page_table_region_size, &g_InitialPageAllocator); | |||
</pre> | </pre> | ||
Next, it generates a random KASLR slide for the Kernel. | Next, it generates a random KASLR slide for the Kernel. | ||
<pre> | <pre> | ||
// | // Repeatedly try to generate a random slide | ||
while (true) { | |||
// Get random value from secure monitor in range | |||
// This is "probably" KSystemControl::GenerateRandomRange, as in normal kernel | |||
// However, it's unclear whether KSystemControl is actually included, or whether this is just copy/pasted? | |||
random_kaslr_slide = KernelLdr_GenerateRandomRange(0xFFFFFF8000000000, 0xFFFFFFFFFFDFFFFF); | |||
aligned_random_kaslr_slide = random_kaslr_slide & 0xFFFFFFFFFFE00000; | |||
// Calculate end address for kernel with this slide, rounding up. | |||
random_kernel_end = aligned_random_kaslr_slide + (kernel_base & 0x1FFFFF) + rw_end_offset + 0x1FFFFF) & 0x1FFE00000; | |||
// Validate no overflow, and that the kernel will fit with the slide. | |||
if (aligned_random_kaslr_slide >= random_kaslr_end || ((random_kaslr_end - 1) > 0xFFFFFFFFFFDFFFFF)) { | |||
continue; | |||
} | |||
// Validate we can map this range without conflicts. | |||
// NOTE: This is inlined, but code looks same as in older kernel binaries. | |||
if (!ttbr1_page_table.IsFree(aligned_random_kaslr_slide, random_kernel_end - aligned_random_kaslr_slide)) { | |||
continue; | |||
} | |||
// Valid kaslr slide, so we're done. | |||
break; | |||
} | |||
final_virtual_kernel_base = aligned_random_kaslr_slide | (kernel_base & 0x1FFFFF); | |||
</pre> | </pre> | ||
Then, it maps the kernel | Then, it maps the kernel at the final virtual address. | ||
<pre> | <pre> | ||
// | // Maps .text as R-X | ||
attribute = 0x40000000000788; | |||
ttbr1_page_table.Map(final_virtual_kernel_base + text_offset, text_end_offset - text_offset, kernel_base + text_offset, &attribute, &g_InitialPageAllocator); | |||
// Maps .rodata as R-- | |||
attribute = 0x60000000000788; | |||
// 9.0.0+ | |||
{ | |||
// On 9.0.0+, .rodata is initially RW- to facilitate .rel.ro. | |||
attribute = 0x60000000000708; | |||
} | |||
ttbr1_page_table.Map(final_virtual_kernel_base + ro_offset, ro_end_offset - ro_offset, kernel_base + ro_offset, &attribute, &g_InitialPageAllocator); | |||
// Maps .rwdata and .bss as RW- | |||
attribute = 0x60000000000708; | |||
ttbr1_page_table.Map(final_virtual_kernel_base + rw_offset, rw_end_offset - rw_offset, kernel_base + rw_offset, &attribute, &g_InitialPageAllocator); | |||
// Clears BSS. | |||
memset(final_kernel_virtual_base + bss_offset, 0, rw_end_offset - bss_offset); | |||
</pre> | </pre> | ||
Then, it calls the kernel's libc .init_array functions. | Then, it applies the kernel's .dynamic relocations and calls the kernel's libc .init_array functions. | ||
<pre> | <pre> | ||
// This is standard libc init_array code, but called for the kernel's binary instead of kernelldr's. | // Applies all R_AARCH64_RELATIVE relocations. | ||
for (uintptr_t cur_func = final_virtual_kernel_base + init_array_offset; cur_func < final_virtual_kernel_base + init_array_end_offset; cur_func += 8) { | KernelLdr_ApplyRelocations(final_kernel_virtual_base, final_kernel_virtual_base + dynamic_offset); | ||
} | // 9.0.0+: Reprotects .rodata as R--. | ||
ttbr1_page_table.ReprotectToReadOnly(final_virtual_kernel_base + ro_offset, ro_end_offset - ro_offset); | |||
// This is standard libc init_array code, but called for the kernel's binary instead of kernelldr's. | |||
for (uintptr_t cur_func = final_virtual_kernel_base + init_array_offset; cur_func < final_virtual_kernel_base + init_array_end_offset; cur_func += 8) { | |||
((void (*)(void))(*(uint64_t *)cur_func)(); | |||
} | |||
</pre> | </pre> | ||
Line 149: | Line 217: | ||
<pre> | <pre> | ||
return final_virtual_kernel_base - original_kernel_base; | return final_virtual_kernel_base - original_kernel_base; | ||
</pre> | |||
== KernelLdr_MapInitialIdentityMapping == | |||
Signature is like | |||
void KernelLdr_MapInitialIdentityMapping(KInitialPageTable *ttbr1_page_table, uintptr_t kernel_base, uintptr_t kernel_size, | |||
uintptr_t page_tables_base, uintptr_t page_tables_size, InitialPageAllocator *allocator); | |||
First, this creates a new page table (eventually ends up in TTBR0_EL1), and adds identity mappings for Kernel, KernelLdr, and the Page Table region to it. | |||
<pre> | |||
// Create new KInitialPageTable | |||
KInitialPageTable ttbr0_page_table(allocator); | |||
// Maps kernel with RWX identity mapping. | |||
attribute = 0x40000000000708; | |||
ttbr0_page_table.Map(kernel_base, kernel_size, kernel_base, &attribute, allocator); | |||
// Maps kernel loader with RWX identity mapping. | |||
attribute = 0x40000000000708; | |||
ttbr0_page_table.Map(__start, __end - __start, __start, &attribute, allocator); | |||
// Maps page table region with RW- identity mapping. | |||
attribute = 0x60000000000708; | |||
ttbr0_page_table.Map(page_tables_base, page_tables_size, page_tables_base, &attribute, allocator); | |||
</pre> | |||
Next, this sets some system registers. | |||
<pre> | |||
// Set TTBR0/TTBR1 with initial page tables. | |||
TTBR0_EL1 = ttbr0_page_table.GetL1Table(); | |||
TTBR1_EL1 = ttbr1_page_table->GetL1Table(); | |||
// Configure MAIR, TCR. TODO: Document here what bits these are. | |||
MAIR_EL1 = 0x44FF0400; | |||
TCR_EL1 = 0x11B5193519; | |||
// Check what CPU we're running on to configure CPUECTLR, CPUACTLR appropriately. | |||
manufacture_id = MIDR_EL1; | |||
implementer = manufacturer_id >> 24) & 0xFF; | |||
// 9.0.0+: Save X19-X30 + SP, save context struct in TPIDR_EL1. | |||
KernelLdr_SaveRegistersToTpidrEl1(); | |||
if (implementer == 0x41) { | |||
// Implementer ID is 0x41 (ARM Limited). | |||
architecture = (manufacture_id >> 4) & 0x0FFF; | |||
hw_variant = (manufacture_id >> 20) & 0xF; | |||
hw_revision = (manufacture_id >> 0) & 0xF; | |||
if (architecture == 0xD07) { | |||
// Architecture is 0xD07 (Cortex-A57). | |||
cpuactlr_value = 0x1000000; // Non-cacheable load forwarding enabled | |||
cpuectlr_value = 0x1B00000040; // Enable the processor to receive instruction cache and TLB maintenance operations broadcast from other processors in the cluster; set the L2 load/store data prefetch distance to 8 requests; set the L2 instruction fetch prefetch distance to 3 requests. | |||
if (hw_variant == 0 || (hw_variant == 1 && hw_revision <= 1)) { | |||
// If supported, disable load-pass DMB. | |||
cpuactlr_value |= 0x800000000000000; | |||
} | |||
CPUACTLR_EL1 = cpuactlr_value; | |||
if (CPUECTLR_EL1 != cpuectlr_value) { | |||
CPUECTLR_EL1 = cpuectlr_value; | |||
} | |||
} else if (architecture == 0xD03) { // 9.0.0+ | |||
// Architecture is 0xD03 (Cortex-A53). | |||
cpuactlr_value = 0x90CA000; // Set L1 data prefetch control to allow 5 outstanding prefetches; enable device split throttle; set the number of independent data prefetch streams to 2; disable transient and no-read-allocate hints for loads; set write streaming no-allocate threshold so the 128th consecutive streaming cache line does not allocate in the L1 or L2 cache. | |||
cpuectlr_value = 0x40; // Enable hardware management of data coherency with other cores in the cluster. | |||
if (hw_variant != 0 || (hw_variant == 0 && hw_revision > 2)) { | |||
// If supported, enable data cache clean as data cache clean/invalidate. | |||
cpuactlr_value |= 0x100000000000; | |||
} | |||
CPUACTLR_EL1 = cpuactlr_value; | |||
if (CPUECTLR_EL1 != cpuectlr_value) { | |||
CPUECTLR_EL1 = cpuectlr_value; | |||
} | |||
} | |||
} | |||
// 9.0.0+: Verify that TPIDR_EL1 is still set. | |||
KernelLdr_VerifyTpidrEl1(); | |||
</pre> | |||
Next, the cache is flushed, to ensure that page tables will be successfully read once the MMU is enabled. | |||
<pre> | |||
KernelLdr_EnsureCacheFlushed(); | |||
</pre> | |||
Finally, SCTLR is written to, enabling the MMU. | |||
<pre> | |||
SCTLR_EL1 = 0x34D5D925; | |||
__dsb_sy(); | |||
__isb(); | |||
</pre> | </pre> | ||
Line 184: | Line 345: | ||
switch (memory_type) { | switch (memory_type) { | ||
case MemoryType_4GB: // 0 | case MemoryType_4GB: // 0 | ||
default: | |||
dram_size_from_kernel_cfg = 0x100000000; | dram_size_from_kernel_cfg = 0x100000000; | ||
break; | break; | ||
Line 190: | Line 352: | ||
break; | break; | ||
case MemoryType_8GB: // 2 | case MemoryType_8GB: // 2 | ||
dram_size_from_kernel_cfg = 0x200000000; | dram_size_from_kernel_cfg = 0x200000000; | ||
break; | break; | ||
Line 211: | Line 372: | ||
<pre> | <pre> | ||
return (smc_get_config(ConfigItem_KernelConfiguration) >> 3) & 1; | return (smc_get_config(ConfigItem_KernelConfiguration) >> 3) & 1; | ||
</pre> | |||
== KernelLdr_GenerateRandomRange == | |||
This uses entropy from the secure monitor to generate a random value in a range (inclusive). | |||
<pre> | |||
range_size = (range_end + 1 - range_start); | |||
random_value = smc_generate_random_bytes(8); | |||
random_value -= random_value / range_size * range_size; | |||
return range_start + random_value; | |||
</pre> | |||
== KernelLdr_EnsureCacheFlushed == | |||
Note: this is inlined, however it uses instructions that no compiler has intrinsics for (and looks like hand-written asm), so it's presumably its own thing. | |||
<pre> | |||
// Invalidate Local Cache | |||
KernelLdr_InvalidateCacheLocal(); | |||
__dsb_sy(); | |||
// Invalidate Share | |||
KernelLdr_InvalidateCacheShared(); | |||
__dsb_sy(); | |||
// Invalidate Local Cache again | |||
KernelLdr_InvalidateCacheLocal(); | |||
__dsb_sy(); | |||
// asm { tlbi vmalle1is; } | |||
__dsb_sy(); | |||
__isb(); | |||
</pre> | |||
== KernelLdr_InvalidateCacheLocal == | |||
Standard ARM cache clean code, uses LoUIS + LoC from CLIDR_EL1. | |||
== KernelLdr_InvalidateCacheShared == | |||
Standard ARM cache clean code, uses LoUIS from CLIDR_EL1. | |||
== KernelLdr_ExceptionTable == | |||
Standard aarch64 exception table, only function that doesn't infinite loop is synchronous exception from same EL (synch_spx_exception) | |||
synch_spx_exception does the following: | |||
* Moves TPIDR_EL1 into X0 | |||
* Infinite loops if it is 0/NULL. | |||
* Restores X19-X30 + SP from the memory pointed to by TPIDR_EL1. | |||
* Returns to the saved LR stored in the context save struct. | |||
== KernelLdr_SaveRegistersToTpidrEl1 == | |||
This saves X19-X30 + SP to an input pointer, and moves the pointer into TPIDR_EL1. | |||
== KernelLdr_VerifyTpidrEl1 == | |||
This just verifies that TPIDR_EL1 is equal to an input argument, and clears it. | |||
<pre> | |||
// 9.0.0+ | |||
if (TPIDR_EL1 != input_arg) { | |||
while (1) { /* Infinite loop panic */ } | |||
} | |||
TPIDR_EL1 = 0 | |||
</pre> | </pre> | ||
Line 257: | Line 485: | ||
// Does Nothing | // Does Nothing | ||
} | } | ||
</pre> | |||
== KInitialPageTable::KInitialPageTable == | |||
NOTE: This constructor is inferred. | |||
<pre> | |||
KInitialPageTable::KInitialPageTable(KInitialPageAllocator *allocator) { | |||
this->l1_table_ptr = allocator->Allocate(); | |||
memset(this->l1_table_ptr, 0, 0x1000); | |||
this->num_l1_table_entries = 0x200; | |||
} | |||
</pre> | |||
== KInitialPageTable::Map == | |||
Signature is like | |||
KInitialPageTable::Map(uintptr_t virtual_address, size_t size, uintptr_t physical_address, const uint64_t *attribute, InitialPageAllocator *allocator); | |||
This is just standard aarch64 page table mapping code. New L2/L3 pages are allocated via allocator->Allocate() when needed. | |||
== KInitialPageTable::IsFree == | |||
This is just standard aarch64 page table code. Walks the page table, verifying that all entries it would map for size + range are free. | |||
== KInitialPageTable::ReprotectToReadOnly == | |||
This is just standard aarch64 page table code. Walks the page table, reprotects the read-write pages in the specified region as read-only. | |||
This is probably a compiler-optimized version of a function that does an arbitrary reprotection. | |||
== KInitialPageTable::GetL1Table == | |||
This is an inferred getter for a (presumably) private member. | |||
<pre> | |||
void *KInitialPageTable::GetL1Table() const { | |||
return this->l1_table_ptr; | |||
} | |||
</pre> | </pre> | ||
Line 352: | Line 622: | ||
| 8 | | 8 | ||
| void (*Free)(KInitialPageAllocator *this, void *address); | | void (*Free)(KInitialPageAllocator *this, void *address); | ||
|- | |||
|} | |||
== KInitialPageTable == | |||
KInitialPageTable is a very, very stripped-down KPageTable. | |||
Compared to pre-KernelLoader KInitialPageTable, it has slightly reduced memory footprint. | |||
{| class="wikitable" border="1" | |||
|- | |||
! Offset | |||
! Size | |||
! Description | |||
|- | |||
| 0x0 | |||
| 8 | |||
| Pointer to L1 Table; | |||
|- | |||
| 0x8 | |||
| 8 | |||
| Number of L1 Table Entries (Normally 0x200); | |||
|- | |- | ||
|} | |} |