My Project
Loading...
Searching...
No Matches
smp.c
Go to the documentation of this file.
1/*
2 * PROJECT: MatanelOS Kernel
3 * LICENSE: GPLv3
4 * PURPOSE: Symmetric MultiProcessing Functions And Implementation.
5 */
6
7#include "../../assert.h"
8#include "../../includes/mh.h"
9#include "../../includes/mm.h"
10#include "../../includes/me.h"
11#include <stdint.h>
12
15
19extern bool smpInitialized;
20
21static inline uint8_t my_lapic_id(void) {
22 uint32_t x = lapic_mmio_read(LAPIC_ID);
23 return (uint8_t)(x >> 24);
24}
25
26// Copy trampoline binary to low phys and map identity for this page.
27static void install_trampoline(void) {
28 uintptr_t virt = AP_TRAMP_PHYS + PhysicalMemoryOffset;
29 PMMPTE pte = MiGetPtePointer(virt);
32 assert((sz <= AP_TRAMP_SIZE), "Size of copy must not be larger than the binary itself");
33 /* 2) Map the physical page into our page tables (virt -> AP_TRAMP_PHYS) */
36
37 /* 3) Copy the trampoline into that mapped page */
39
40 /* 4) Make sure caches/TLB don't have stale data:
41 clflush the page (per 64-byte cacheline) and invlpg the page. */
42 for (uintptr_t off = 0; off < 4096; off += 64) {
43 __asm__ volatile("clflush (%0)" :: "r"((char*)virt + off) : "memory");
44 }
45 __asm__ volatile("invlpg (%0)" :: "r"(virt) : "memory");
46}
47
48#define CPU_STACK_SIZE (24*1024) // 24 KiB stack.
49
50extern PROCESSOR cpu0;
51
52// Allocate PER CPU stack and populare cpus[]
53static void prepare_percpu(uint8_t* apic_list, uint32_t cpu_count) {
54 uint8_t my_id = my_lapic_id();
55
56 for (uint32_t i = 0; i < cpu_count && i < MAX_CPUS; i++) {
57 uint8_t aid = apic_list[i];
58
59 if (aid == my_id) {
60 // BSP slot, since we want synchronization for all APs, we migrate cpu0 to this global variable of CPUs, and change gs once again.
61 // Debugging helped me solve this, I saw that [i].IpiSeq (i = bsp slot), was 3, but [i].self->IpiSeq is 0, which was the real one.
62 // So we infinite looped.
63
64 // Explicitly disable interrupts for synchronization.
65
66 bool Enabled = MeDisableInterrupts();
67
68 // Copy all of the cpu data to here.
69 kmemcpy(&cpus[i], &cpu0, sizeof(PROCESSOR));
70
71 // Set the new self ptr and other variables.
72 cpus[i].self = &cpus[i];
73 cpus[i].ID = i;
74 cpus[i].lapic_ID = aid;
75 cpus[i].flags = CPU_ONLINE;
76
77 // Set the GS to point to new cpus[i]
78 __writemsr(IA32_GS_BASE, (uint64_t)&cpus[i]);
79
80 // Re-Enable if enabled before.
81 MeEnableInterrupts(Enabled);
82
83 continue;
84 }
85
86 // Initialize basic values.
87 cpus[i].self = &cpus[i];
88 cpus[i].currentIrql = PASSIVE_LEVEL;
89 cpus[i].schedulerEnabled = false;
90 cpus[i].currentThread = NULL;
91 kmemset(&cpus[i].readyQueue, 0, sizeof(cpus[i].readyQueue));
92 cpus[i].ID = i;
93 cpus[i].lapic_ID = aid;
94
95 // Allocate stack -- aligned 16.
96 void* stack = MiCreateKernelStack(true);
97 cpus[i].VirtStackTop = stack;
98
99 // IST Stack setup & GDT & TSS have been moved to MeInitProcesor function.
100
101 // CPU Flags
102 cpus[i].flags |= CPU_UNAVAILABLE; // Start unavailable.
103 cpus[i].schedulePending = false;
104
105 // DPCs & Queue
106 kmemset(&cpus[i].CurrentDeferredRoutine, 0, sizeof(cpus[i].CurrentDeferredRoutine));
107
108 }
110}
111
112static void send_startup_ipis(uint8_t apic_id) {
113 // init
114 lapic_send_ipi(apic_id, 0, (0x5 << 8) | (1 << 14)); // init assert
115 pit_sleep_ms(10);
116
117 uint8_t vector = (uint8_t)(AP_TRAMP_PHYS >> 12);
118
119 // SIPI x2
120 lapic_send_ipi(apic_id, vector, (0x6 << 8));
121 pit_sleep_ms(1);
122 lapic_send_ipi(apic_id, vector, (0x6 << 8));
123 pit_sleep_ms(1);
124}
125
126// Globals for use of IPI & other functions.
128uint32_t g_cpuCount = 1; // Must be 1, to include the BSP.
130
131// BSP Entry: start all APs.
132void MhInitializeSMP(uint8_t* apic_list, uint32_t cpu_count, uint32_t lapicAddress) {
133 // populate cpus and per cpu stacks.
134 prepare_percpu(apic_list, cpu_count);
135 // copy trampoline
136 install_trampoline();
137
138 // Fill in the globals.
141 for (uint32_t i = 0; i < cpu_count; i++) {
142 g_apic_list[i] = apic_list[i];
143 }
144
145 bootInfo.magic = SMP_MAGIC;
146 bootInfo.kernel_pml4_phys = boot_info_local.Pml4Phys;
147 bootInfo.ap_entry_virt = (uint64_t)&APMain;
148 bootInfo.cpu_count = cpu_count;
149 bootInfo.lapic_base = lapicAddress;
150
151 // write address of ap main to the offset
153 PMMPTE pte = MiGetPtePointer(virt);
157 uint64_t ap_main_addr = (uint64_t)&APMain;
158 kmemcpy((void*)virt, &ap_main_addr, sizeof(ap_main_addr));
159
162 pte = MiGetPtePointer(virt);
166 uintptr_t cr3 = boot_info_local.Pml4Phys;
167 kmemcpy((void*)virt, &cr3, sizeof(cr3));
168
169 // write address of CPUs to the offset
171 pte = MiGetPtePointer(virt);
175 uintptr_t cpuAddress = (uintptr_t)cpus;
176 kmemcpy((void*)virt, &cpuAddress, sizeof(cpuAddress));
177
178 // send INIT/SIPI/SIPI to APs (skip BSP)
179 uint8_t my_id = my_lapic_id();
180 for (uint32_t i = 0; i < cpu_count; i++) {
181 uint8_t aid = apic_list[i];
182 if (aid == my_id) continue;
183 send_startup_ipis(aid);
184 }
185 // over - Application Processors (the other CPUs) should execute trampoline and call ap_main();
186 // now, we wait until all are online.
187 for (uint32_t i = 0; i < g_cpuCount; i++) {
188 while (!(cpus[i].flags & CPU_ONLINE)) {
189 __pause();
190 }
191 }
192 smpInitialized = true;
193}
194
197 uint8_t ProcessorNumber
198)
199
200{
201 if (!smpInitialized) return &cpu0;
202
203 // SMP Is on, we iterate over the cpus list until we find the lapic for the processor.
204 for (uint8_t i = 0; i < MeGetActiveProcessorCount(); i++) {
205 if (cpus[i].lapic_ID == ProcessorNumber) return &cpus[i];
206 }
207
208 // The CPU isn't found, we return NULL (would bugcheck though).
209 return NULL;
210}
211
213 if (!g_cpuCount || !smpInitialized) return;
214 uint8_t myid = my_lapic_id();
215
216 static uint64_t g_ipiSeq = 1; // Global sequence of IPIs made.
217 uint64_t seq = InterlockedIncrementU64(&g_ipiSeq);
218
219 __asm__ volatile("mfence" ::: "memory");
220
221 for (uint32_t i = 0; i < g_cpuCount; i++) {
222 if (cpus[i].lapic_ID == myid) continue;
223 if (!(cpus[i].flags & CPU_ONLINE)) continue;
224
225 cpus[i].IpiAction = action;
226 cpus[i].IpiParameter = parameter;
227
228 cpus[i].IpiSeq = seq; // assign sequence number
229 uint32_t LAPIC_ACTION_VECTOR = VECTOR_IPI;
230 lapic_send_ipi(cpus[i].lapic_ID, (uint8_t)LAPIC_ACTION_VECTOR, 0x0);
231 }
232
233 // wait for all CPUs to handle this exact IPI
234 for (uint32_t i = 0; i < g_cpuCount; i++) {
235 if (cpus[i].lapic_ID == myid || !(cpus[i].flags & CPU_ONLINE))
236 continue;
237
238 while (*(volatile uint64_t*)&cpus[i].IpiSeq == seq) {
239 __pause(); // spin until they clear the seq
240 }
241 }
242}
BOOT_INFO boot_info_local
Definition kernel.c:27
void APMain(void)
---------------— FUNCTIONS ---------------—
Definition ap_main.c:35
SMP_BOOTINFO bootInfo
Definition smp.c:18
void lapic_send_ipi(uint8_t apic_id, uint8_t vector, uint32_t flags)
Definition apic.c:129
uint32_t lapic_mmio_read(uint32_t off)
Definition apic.c:36
#define assert(...)
Definition assert.h:57
FORCEINLINE uint64_t InterlockedIncrementU64(volatile uint64_t *target)
Definition atomic.h:119
bool smpInitialized
Definition kernel.c:146
struct _PROCESSOR PROCESSOR
Definition core.h:45
@ PASSIVE_LEVEL
Definition core.h:13
PROCESSOR * PPROCESSOR
Definition core.h:46
FORCEINLINE void __writemsr(uint32_t msr, uint64_t value)
Definition intrin.h:200
FORCEINLINE void __pause(void)
Definition intrin.h:224
#define IA32_GS_BASE
Definition intrin.h:20
bool MeDisableInterrupts(void)
Definition irql.c:186
void MeEnableInterrupts(IN bool EnabledBefore)
Definition irql.c:199
uint8_t apic_list[MAX_CPUS]
Definition kernel.c:143
uint32_t cpu_count
Definition kernel.c:144
PMMPTE MiGetPtePointer(IN uintptr_t va)
Definition map.c:76
FORCEINLINE uint8_t MeGetActiveProcessorCount(void)
Definition me.h:394
@ CPU_ONLINE
Definition me.h:219
@ CPU_UNAVAILABLE
Definition me.h:222
#define AP_TRAMP_PML4_OFFSET
Definition mh.h:406
struct _IPI_PARAMS IPI_PARAMS
#define AP_TRAMP_CPUS_OFFSET
Definition mh.h:407
#define AP_TRAMP_APMAIN_OFFSET
Definition mh.h:405
uint32_t flags
Definition mh.h:2
#define MAX_CPUS
Definition mh.h:408
struct _SMP_BOOTINFO SMP_BOOTINFO
#define SMP_MAGIC
Definition mh.h:410
#define VECTOR_IPI
Definition mh.h:49
#define AP_TRAMP_SIZE
Definition mh.h:404
#define LAPIC_ID
Definition mh.h:409
uint32_t lapicAddress
Definition mh.h:1
enum _CPU_ACTION CPU_ACTION
#define AP_TRAMP_PHYS
Definition mh.h:403
@ PAGE_RW
Definition mm.h:272
@ PAGE_PRESENT
Definition mm.h:268
@ PAGE_PCD
Definition mm.h:285
struct _MMPTE * PMMPTE
FORCEINLINE void * kmemcpy(void *dest, const void *src, size_t len)
Definition mm.h:554
FORCEINLINE void * kmemset(void *dest, int64_t val, uint64_t len)
Definition mm.h:540
#define PhysicalMemoryOffset
Definition mm.h:56
#define MI_WRITE_PTE(_PtePointer, _Va, _Pa, _Flags)
Definition mm.h:90
void * MiCreateKernelStack(IN bool LargeStack)
Definition mmproc.c:25
void pit_sleep_ms(uint32_t ms)
Definition pit.c:19
PROCESSOR cpus[]
Definition smp.c:16
uint32_t g_cpuCount
Definition smp.c:128
int smp_cpu_count
Definition smp.c:17
PROCESSOR cpu0
Definition kernel.c:21
uint32_t g_lapicAddress
Definition smp.c:129
PPROCESSOR MeGetProcessorBlock(uint8_t ProcessorNumber)
Definition smp.c:196
void MhInitializeSMP(uint8_t *apic_list, uint32_t cpu_count, uint32_t lapicAddress)
Definition smp.c:132
uint8_t g_apic_list[MAX_CPUS]
Definition smp.c:127
uint8_t _binary_build_ap_trampoline_bin_end[]
uint8_t _binary_build_ap_trampoline_bin_start[]
void MhSendActionToCpusAndWait(CPU_ACTION action, IPI_PARAMS parameter)
Definition smp.c:212