Kernel Loader: Difference between revisions

 
(20 intermediate revisions by one other user not shown)
Line 21: Line 21:
     KernelLdr_ApplyRelocations(&KernelLdr_Main, __dynamic_start);
     KernelLdr_ApplyRelocations(&KernelLdr_Main, __dynamic_start);
     KernelLdr_libc_init_array();
     KernelLdr_libc_init_array();
</pre>
[9.0.0+]
Then it clears TPIDR_EL1 to 0, and sets VBAR_EL1.
<pre>
    // 9.0.0+
    TPIDR_EL1 = 0
    VBAR_EL1 = KernelLdr_ExceptionTable
</pre>
</pre>


Line 139: Line 147:
Next, it generates a random KASLR slide for the Kernel.
Next, it generates a random KASLR slide for the Kernel.
<pre>
<pre>
// TODO: Fill this out with pseudocode.
    // Repeatedly try to generate a random slide
    while (true) {
        // Get random value from secure monitor in range
        // This is "probably" KSystemControl::GenerateRandomRange, as in normal kernel
        // However, it's unclear whether KSystemControl is actually included, or whether this is just copy/pasted?
        random_kaslr_slide = KernelLdr_GenerateRandomRange(0xFFFFFF8000000000, 0xFFFFFFFFFFDFFFFF);
        aligned_random_kaslr_slide = random_kaslr_slide & 0xFFFFFFFFFFE00000;
       
        // Calculate end address for kernel with this slide, rounding up.
        random_kernel_end = aligned_random_kaslr_slide + (kernel_base & 0x1FFFFF) + rw_end_offset + 0x1FFFFF) & 0x1FFE00000;
     
        // Validate no overflow, and that the kernel will fit with the slide.
        if (aligned_random_kaslr_slide >= random_kaslr_end || ((random_kaslr_end - 1) > 0xFFFFFFFFFFDFFFFF)) {
            continue;
        }
 
        // Validate we can map this range without conflicts.
        // NOTE: This is inlined, but code looks same as in older kernel binaries.
        if (!ttbr1_page_table.IsFree(aligned_random_kaslr_slide, random_kernel_end - aligned_random_kaslr_slide)) {
            continue;
        }
 
        // Valid kaslr slide, so we're done.
        break;
    }
    final_virtual_kernel_base = aligned_random_kaslr_slide | (kernel_base & 0x1FFFFF);
</pre>
</pre>


Line 150: Line 183:
     // Maps .rodata as R--
     // Maps .rodata as R--
     attribute = 0x60000000000788;
     attribute = 0x60000000000788;
    // 9.0.0+
    {
        // On 9.0.0+, .rodata is initially RW- to facilitate .rel.ro.
        attribute = 0x60000000000708;
    }
     ttbr1_page_table.Map(final_virtual_kernel_base + ro_offset, ro_end_offset - ro_offset, kernel_base + ro_offset, &attribute, &g_InitialPageAllocator);
     ttbr1_page_table.Map(final_virtual_kernel_base + ro_offset, ro_end_offset - ro_offset, kernel_base + ro_offset, &attribute, &g_InitialPageAllocator);


Line 164: Line 204:
     // Applies all R_AARCH64_RELATIVE relocations.
     // Applies all R_AARCH64_RELATIVE relocations.
     KernelLdr_ApplyRelocations(final_kernel_virtual_base, final_kernel_virtual_base + dynamic_offset);
     KernelLdr_ApplyRelocations(final_kernel_virtual_base, final_kernel_virtual_base + dynamic_offset);
    // 9.0.0+: Reprotects .rodata as R--.
    ttbr1_page_table.ReprotectToReadOnly(final_virtual_kernel_base + ro_offset, ro_end_offset - ro_offset);
      
      
     // This is standard libc init_array code, but called for the kernel's binary instead of kernelldr's.
     // This is standard libc init_array code, but called for the kernel's binary instead of kernelldr's.
Line 203: Line 246:
</pre>
</pre>


TODO: More stuff
Next, this sets some system registers.
<pre>
    // Set TTBR0/TTBR1 with initial page tables.
    TTBR0_EL1 = ttbr0_page_table.GetL1Table();
    TTBR1_EL1 = ttbr1_page_table->GetL1Table();
   
    // Configure MAIR, TCR. TODO: Document here what bits these are.
    MAIR_EL1 = 0x44FF0400;
    TCR_EL1  = 0x11B5193519;
 
    // Check what CPU we're running on to configure CPUECTLR, CPUACTLR appropriately.
    manufacture_id = MIDR_EL1;
    implementer = manufacturer_id >> 24) & 0xFF;
   
    // 9.0.0+: Save X19-X30 + SP, save context struct in TPIDR_EL1.
    KernelLdr_SaveRegistersToTpidrEl1();
 
    if (implementer == 0x41) {
        // Implementer ID is 0x41 (ARM Limited).
        architecture = (manufacture_id >> 4)  & 0x0FFF;
        hw_variant  = (manufacture_id >> 20) & 0xF;
        hw_revision  = (manufacture_id >> 0)  & 0xF;
        if (architecture == 0xD07) {
            // Architecture is 0xD07 (Cortex-A57).
            cpuactlr_value = 0x1000000;    // Non-cacheable load forwarding enabled
            cpuectlr_value = 0x1B00000040; // Enable the processor to receive instruction cache and TLB maintenance operations broadcast from other processors in the cluster; set the L2 load/store data prefetch distance to 8 requests; set the L2 instruction fetch prefetch distance to 3 requests.
            if (hw_variant == 0 || (hw_variant == 1 && hw_revision <= 1)) {
                // If supported, disable load-pass DMB.
                cpuactlr_value |= 0x800000000000000;
            }
            CPUACTLR_EL1 = cpuactlr_value;
            if (CPUECTLR_EL1 != cpuectlr_value) {
                CPUECTLR_EL1 = cpuectlr_value;
            }
        } else if (architecture == 0xD03) { // 9.0.0+
            // Architecture is 0xD03 (Cortex-A53).
            cpuactlr_value = 0x90CA000; // Set L1 data prefetch control to allow 5 outstanding prefetches; enable device split throttle; set the number of independent data prefetch streams to 2; disable transient and no-read-allocate hints for loads; set write streaming no-allocate threshold so the 128th consecutive streaming cache line does not allocate in the L1 or L2 cache.
            cpuectlr_value = 0x40;      // Enable hardware management of data coherency with other cores in the cluster.
            if (hw_variant != 0 || (hw_variant == 0 && hw_revision > 2)) {
                // If supported, enable data cache clean as data cache clean/invalidate.
                cpuactlr_value |= 0x100000000000;
            }
            CPUACTLR_EL1 = cpuactlr_value;
            if (CPUECTLR_EL1 != cpuectlr_value) {
                CPUECTLR_EL1 = cpuectlr_value;
            }
        }
    }
 
 
    // 9.0.0+: Verify that TPIDR_EL1 is still set.
    KernelLdr_VerifyTpidrEl1();
</pre>
 
Next, the cache is flushed, to ensure that page tables will be successfully read once the MMU is enabled.
<pre>
    KernelLdr_EnsureCacheFlushed();
</pre>
 
Finally, SCTLR is written to, enabling the MMU.
<pre>
    SCTLR_EL1 = 0x34D5D925;
    __dsb_sy();
    __isb();
</pre>


== KernelLdr_RelocateKernelPhysically ==
== KernelLdr_RelocateKernelPhysically ==
Line 238: Line 345:
     switch (memory_type) {
     switch (memory_type) {
         case MemoryType_4GB: // 0
         case MemoryType_4GB: // 0
        default:
             dram_size_from_kernel_cfg = 0x100000000;
             dram_size_from_kernel_cfg = 0x100000000;
             break;
             break;
Line 244: Line 352:
             break;
             break;
         case MemoryType_8GB: // 2
         case MemoryType_8GB: // 2
        default:
             dram_size_from_kernel_cfg = 0x200000000;
             dram_size_from_kernel_cfg = 0x200000000;
             break;
             break;
Line 265: Line 372:
<pre>
<pre>
     return (smc_get_config(ConfigItem_KernelConfiguration) >> 3) & 1;
     return (smc_get_config(ConfigItem_KernelConfiguration) >> 3) & 1;
</pre>
== KernelLdr_GenerateRandomRange ==
This uses entropy from the secure monitor to generate a random value in a range (inclusive).
<pre>
    range_size  = (range_end + 1 - range_start);
    random_value = smc_generate_random_bytes(8);
    random_value -= random_value / range_size * range_size;
    return range_start + random_value;
</pre>
== KernelLdr_EnsureCacheFlushed ==
Note: this is inlined, however it uses instructions that no compiler has intrinsics for (and looks like hand-written asm), so it's presumably its own thing.
<pre>
    // Invalidate Local Cache
    KernelLdr_InvalidateCacheLocal();
    __dsb_sy();
    // Invalidate Share
    KernelLdr_InvalidateCacheShared();
    __dsb_sy();
    // Invalidate Local Cache again
    KernelLdr_InvalidateCacheLocal();
    __dsb_sy();
   
    // asm { tlbi vmalle1is; }
    __dsb_sy();
    __isb();
</pre>
== KernelLdr_InvalidateCacheLocal ==
Standard ARM cache clean code, uses LoUIS + LoC from CLIDR_EL1.
== KernelLdr_InvalidateCacheShared ==
Standard ARM cache clean code, uses LoUIS from CLIDR_EL1.
== KernelLdr_ExceptionTable ==
Standard aarch64 exception table, only function that doesn't infinite loop is synchronous exception from same EL (synch_spx_exception)
synch_spx_exception does the following:
* Moves TPIDR_EL1 into X0
* Infinite loops if it is 0/NULL.
* Restores X19-X30 + SP from the memory pointed to by TPIDR_EL1.
* Returns to the saved LR stored in the context save struct.
== KernelLdr_SaveRegistersToTpidrEl1 ==
This saves X19-X30 + SP to an input pointer, and moves the pointer into TPIDR_EL1.
== KernelLdr_VerifyTpidrEl1 ==
This just verifies that TPIDR_EL1 is equal to an input argument, and clears it.
<pre>
    // 9.0.0+
    if (TPIDR_EL1 != input_arg) {
        while (1) { /* Infinite loop panic */ }
    }
    TPIDR_EL1 = 0
</pre>
</pre>


Line 332: Line 506:


This is just standard aarch64 page table mapping code. New L2/L3 pages are allocated via allocator->Allocate() when needed.
This is just standard aarch64 page table mapping code. New L2/L3 pages are allocated via allocator->Allocate() when needed.
== KInitialPageTable::IsFree ==
This is just standard aarch64 page table code. Walks the page table, verifying that all entries it would map for size + range are free.
== KInitialPageTable::ReprotectToReadOnly ==
This is just standard aarch64 page table code. Walks the page table, reprotects the read-write pages in the specified region as read-only.
This is probably a compiler-optimized version of a function that does an arbitrary reprotection.
== KInitialPageTable::GetL1Table ==
This is an inferred getter for a (presumably) private member.
<pre>
    void *KInitialPageTable::GetL1Table() const {
        return this->l1_table_ptr;
    }
</pre>


= Structures =
= Structures =