From e93bbe65348197350478c19f9abd55b44fc413d9 Mon Sep 17 00:00:00 2001 From: Matheus Pecoraro Date: Tue, 6 Aug 2024 17:42:11 -0300 Subject: [PATCH] x86_64/amd64: Add SMP support Adds SMP support for the x86_64 amd64 BSP. --- bsps/x86_64/amd64/clock/clock.c | 16 +- bsps/x86_64/amd64/include/apic.h | 105 ++++++++++- bsps/x86_64/amd64/include/gdt.h | 64 +++++++ bsps/x86_64/amd64/include/smp.h | 47 +++++ bsps/x86_64/amd64/interrupts/apic.c | 169 +++++++++++++----- bsps/x86_64/amd64/interrupts/idt.c | 43 ++++- bsps/x86_64/amd64/interrupts/isr_handler.S | 119 ++++++------ bsps/x86_64/amd64/start/ap_trampoline.S | 147 +++++++++++++++ bsps/x86_64/amd64/start/bspreset.c | 7 +- bsps/x86_64/amd64/start/bspsmp.c | 151 ++++++++++++++++ bsps/x86_64/amd64/start/bspstart.c | 5 +- bsps/x86_64/amd64/start/gdt.c | 47 +++++ bsps/x86_64/amd64/start/start.S | 28 ++- cpukit/score/cpu/x86_64/include/rtems/asm.h | 48 +++++ .../cpu/x86_64/include/rtems/score/cpu.h | 97 +++++++--- .../cpu/x86_64/include/rtems/score/cpu_asm.h | 5 + .../cpu/x86_64/include/rtems/score/idt.h | 8 +- .../score/cpu/x86_64/x86_64-context-switch.S | 89 ++++++--- spec/build/bsps/x86_64/amd64/grp.yml | 2 + spec/build/bsps/x86_64/amd64/obj.yml | 2 + spec/build/bsps/x86_64/amd64/objsmp.yml | 19 ++ spec/build/cpukit/optsmp.yml | 1 + 22 files changed, 1067 insertions(+), 152 deletions(-) create mode 100644 bsps/x86_64/amd64/include/gdt.h create mode 100644 bsps/x86_64/amd64/include/smp.h create mode 100644 bsps/x86_64/amd64/start/ap_trampoline.S create mode 100644 bsps/x86_64/amd64/start/bspsmp.c create mode 100644 bsps/x86_64/amd64/start/gdt.c create mode 100644 spec/build/bsps/x86_64/amd64/objsmp.yml diff --git a/bsps/x86_64/amd64/clock/clock.c b/bsps/x86_64/amd64/clock/clock.c index b7d34f05fe..543ad760d2 100644 --- a/bsps/x86_64/amd64/clock/clock.c +++ b/bsps/x86_64/amd64/clock/clock.c @@ -44,6 +44,7 @@ #include #include #include +#include static struct timecounter amd64_clock_tc; @@ -61,6 +62,14 @@ static void lapic_timer_isr(void *param) lapic_eoi(); } +#ifdef RTEMS_SMP +static void smp_lapic_timer_enable(void* arg) +{ + uint32_t* lapic_reload_value = (uint32_t*) arg; + lapic_timer_enable(*lapic_reload_value); +} +#endif + void lapic_timer_install_handler(void) { rtems_status_code sc = rtems_interrupt_handler_install( @@ -84,7 +93,12 @@ void amd64_clock_driver_initialize(void) ); /* Setup and initialize the Local APIC timer */ - lapic_timer_initialize(irq_ticks_per_sec); + uint32_t lapic_reload_value = lapic_timer_calc_ticks(irq_ticks_per_sec); +#ifdef RTEMS_SMP + _SMP_Broadcast_action(smp_lapic_timer_enable, &lapic_reload_value); +#else + lapic_timer_enable(lapic_reload_value); +#endif amd64_clock_tc.tc_get_timecount = amd64_clock_get_timecount; amd64_clock_tc.tc_counter_mask = 0xffffffff; diff --git a/bsps/x86_64/amd64/include/apic.h b/bsps/x86_64/amd64/include/apic.h index 07de962222..3289adcd08 100644 --- a/bsps/x86_64/amd64/include/apic.h +++ b/bsps/x86_64/amd64/include/apic.h @@ -47,15 +47,19 @@ extern "C" { /* Value to hardware-enable the APIC through the APIC_BASE_MSR */ #define APIC_BASE_MSR_ENABLE 0x800 +#define xAPIC_MAX_APIC_ID 0xFE + /* * Since the LAPIC registers are contained in an array of 32-bit elements * these byte-offsets need to be divided by 4 to index the array. */ #define LAPIC_OFFSET(val) (val >> 2) -#define LAPIC_REGISTER_APICID LAPIC_OFFSET(0x20) +#define LAPIC_REGISTER_ID LAPIC_OFFSET(0x20) #define LAPIC_REGISTER_EOI LAPIC_OFFSET(0x0B0) #define LAPIC_REGISTER_SPURIOUS LAPIC_OFFSET(0x0F0) +#define LAPIC_REGISTER_ICR_LOW LAPIC_OFFSET(0x300) +#define LAPIC_REGISTER_ICR_HIGH LAPIC_OFFSET(0x310) #define LAPIC_REGISTER_LVT_TIMER LAPIC_OFFSET(0x320) #define LAPIC_REGISTER_TIMER_INITCNT LAPIC_OFFSET(0x380) #define LAPIC_REGISTER_TIMER_CURRCNT LAPIC_OFFSET(0x390) @@ -63,6 +67,14 @@ extern "C" { #define LAPIC_LVT_MASK 0x10000 +#define LAPIC_ICR_HIGH_MASK 0x00FFFFFF +#define LAPIC_ICR_LOW_MASK 0xFFF32000 +#define LAPIC_ICR_DELIV_INIT 0x500 +#define LAPIC_ICR_DELIV_START 0x600 +#define LAPIC_ICR_DELIV_STAT_PEND 0x1000 +#define LAPIC_ICR_ASSERT 0x4000 +#define LAPIC_ICR_TRIG_LEVEL 0x8000 + #define LAPIC_EOI_ACK 0 #define LAPIC_SELECT_TMR_PERIODIC 0x20000 #define LAPIC_SPURIOUS_ENABLE 0x100 @@ -74,7 +86,7 @@ extern "C" { /* Value to set in register to pick the divide value above */ #define LAPIC_TIMER_SELECT_DIVIDER 3 -/* PIT defines used during LAPIC timer calibration */ +/* PIT defines and macros used when calibrating the LAPIC timer and starting APs */ #define PIT_FREQUENCY 1193180 /* * The PIT_FREQUENCY determines how many times the PIT counter is decremented @@ -119,7 +131,44 @@ RTEMS_STATIC_ASSERT( #define PIT_SELECT_ONE_SHOT_MODE 0b00000010 #define PIT_SELECT_BINARY_MODE 0 +#define PIT_CHAN2_ENABLE(chan2_value) \ + /* Enable the channel 2 timer gate and disable the speaker output */ \ + chan2_value = (inport_byte(PIT_PORT_CHAN2_GATE) | PIT_CHAN2_TIMER_BIT) \ + & ~PIT_CHAN2_SPEAKER_BIT; \ + outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \ + /* Initialize PIT in one-shot mode on Channel 2 */ \ + outport_byte( \ + PIT_PORT_MCR, \ + PIT_SELECT_CHAN2 | PIT_SELECT_ACCESS_LOHI | \ + PIT_SELECT_ONE_SHOT_MODE | PIT_SELECT_BINARY_MODE \ + ); \ + +#define PIT_CHAN2_WRITE_TICKS(pit_ticks) \ + /* Set PIT reload value */ \ + outport_byte(PIT_PORT_CHAN2, pit_ticks & 0xff); \ + stub_io_wait(); \ + outport_byte(PIT_PORT_CHAN2, (pit_ticks >> 8) & 0xff); \ + +#define PIT_CHAN2_START_DELAY(chan2_value) \ + /* Restart PIT by disabling the gated input and then re-enabling it */ \ + chan2_value &= ~PIT_CHAN2_TIMER_BIT; \ + outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \ + chan2_value |= PIT_CHAN2_TIMER_BIT; \ + outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \ + +#define PIT_CHAN2_WAIT_DELAY(pit_ticks) \ + do { \ + uint32_t curr_ticks = pit_ticks; \ + while ( curr_ticks <= pit_ticks ) { \ + /* Send latch command to read multi-byte value atomically */ \ + outport_byte(PIT_PORT_MCR, PIT_SELECT_CHAN2); \ + curr_ticks = inport_byte(PIT_PORT_CHAN2); \ + curr_ticks |= inport_byte(PIT_PORT_CHAN2) << 8; \ + } \ + } while(0); \ + extern volatile uint32_t* amd64_lapic_base; +extern uint8_t amd64_lapic_to_cpu_map[xAPIC_MAX_APIC_ID + 1]; /** * @brief Initializes the Local APIC by hardware and software enabling it. @@ -133,14 +182,56 @@ extern volatile uint32_t* amd64_lapic_base; bool lapic_initialize(void); /** - * @brief Initializes the Local APIC timer + * @brief Calculates the number of Local APIC timer ticks which can be used + * with lapic_timer_enable to set up a timer of given frequency. * - * Calibrates and initializes the Local APIC timer configuring it to - * periodically generate interrupts on vector BSP_VECTOR_APIC_TIMER + * @param desired_freq_hz The frequency in Hz. * - * @param desired_freq_hz The desired frequency of the Local APIC timer + * @return The number of Local APIC timer ticks. */ -void lapic_timer_initialize(uint64_t desired_freq_hz); +uint32_t lapic_timer_calc_ticks(uint64_t desired_freq_hz); + +/** + * @brief Enables the Local APIC timer. + * + * @param reload_value Number of ticks per interrupt. + */ +void lapic_timer_enable(uint32_t reload_value); + +#ifdef RTEMS_SMP +/** + * @brief Retrieves the number of available processors in the system + * + * @return Number of available processors + */ +uint32_t lapic_get_num_of_procesors(void); + +/** + * @brief Sends an interprocessor interrupt to a specified processor. + * + * @param target_cpu_index The processor index of the target processor. + * @param isr_vector The vector of the interrupt being sent. + */ +void lapic_send_ipi(uint32_t target_cpu_index, uint8_t isr_vector); + +/** + * @brief Starts the Application Processor that corresponds to cpu_index. + * + * @param cpu_index The processor to be started. + * @param page_vector The under 1MB 4KB page where the trampoline code is located. + */ +void lapic_start_ap(uint32_t cpu_index, uint8_t page_vector); +#endif + +/** + * @brief Retrieves the Local APIC ID + * @return Local APIC ID + */ +uint8_t inline lapic_get_id(void) +{ + /* ID stored in highest 8 bits */ + return amd64_lapic_base[LAPIC_REGISTER_ID]>>24; +} /** * @brief Signals an end of interrupt to the Local APIC diff --git a/bsps/x86_64/amd64/include/gdt.h b/bsps/x86_64/amd64/include/gdt.h new file mode 100644 index 0000000000..12dfe0edfb --- /dev/null +++ b/bsps/x86_64/amd64/include/gdt.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +/** + * @file + * + * @ingroup RTEMSBSPsX8664AMD64 + * + * @brief Global Descriptor Table + */ + +/* + * Copyright (C) 2024 Matheus Pecoraro + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GDT_H +#define GDT_H + +#define GDT_CODE_SEG_OFFSET 0x8 +#define GDT_DATA_SEG_OFFSET 0x10 + +#ifndef ASM +#include + +#include + +typedef struct { + uint16_t limit_low; /* Lower 16 bits of the limit */ + uint16_t base_low; /* Lower 16 bits of the base */ + uint8_t base_middle; /* Next 8 bits of the base */ + uint8_t access; /* Access flags */ + uint8_t gran_limit_middle; /* Granularity flags and upper 4 bits of the limit */ + uint8_t base_high; /* Last 8 bits of the base */ +} RTEMS_PACKED gdt_entry; + +typedef struct { + uint16_t size; + uint64_t addr; +} RTEMS_PACKED gdt_desc; + +extern const gdt_desc amd64_gdt_descriptor; +#endif // ASM + +#endif // GDT_H diff --git a/bsps/x86_64/amd64/include/smp.h b/bsps/x86_64/amd64/include/smp.h new file mode 100644 index 0000000000..b9c2e4250e --- /dev/null +++ b/bsps/x86_64/amd64/include/smp.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +/** + * @file + * + * @ingroup RTEMSBSPsX8664AMD64 + * + * @brief BSP SMP definitions + */ + +/* + * Copyright (C) 2024 Matheus Pecoraro + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define TRAMPOLINE_ADDR 0x8000 +#define TRAMPOLINE_PAGE_VECTOR (TRAMPOLINE_ADDR>>12) +#define TRAMPOLINE_AP_FLAG_OFFSET 0x10 + +#define WAIT_FOR_AP_TIMEOUT_MS 10 + +#ifndef ASM +/** + * @brief This routine will be called by the trampoline code + */ +RTEMS_NO_RETURN void smp_init_ap(void); +#endif diff --git a/bsps/x86_64/amd64/interrupts/apic.c b/bsps/x86_64/amd64/interrupts/apic.c index 5e34170d3a..97626f4973 100644 --- a/bsps/x86_64/amd64/interrupts/apic.c +++ b/bsps/x86_64/amd64/interrupts/apic.c @@ -44,6 +44,13 @@ extern void apic_spurious_handler(void); volatile uint32_t* amd64_lapic_base; +#ifdef RTEMS_SMP +/* Maps the processor index to the Local APIC ID */ +uint8_t amd64_lapic_to_cpu_map[xAPIC_MAX_APIC_ID + 1]; +static uint8_t cpu_to_lapic_map[xAPIC_MAX_APIC_ID + 1]; +static uint32_t lapic_count = 0; +#endif + /** * @brief Returns wheather the system contains a local APIC or not. * @@ -68,6 +75,19 @@ static bool has_lapic_support(void) static void madt_subtables_handler(ACPI_SUBTABLE_HEADER* entry) { switch (entry->Type) { +#ifdef RTEMS_SMP + case ACPI_MADT_TYPE_LOCAL_APIC: + ACPI_MADT_LOCAL_APIC* lapic_entry = (ACPI_MADT_LOCAL_APIC*) entry; + + if (lapic_count >= xAPIC_MAX_APIC_ID + 1 || + lapic_get_id() == lapic_entry->Id) { + break; + } + + amd64_lapic_to_cpu_map[lapic_entry->Id] = (uint8_t) lapic_count; + cpu_to_lapic_map[lapic_count++] = lapic_entry->Id; + break; +#endif case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE: ACPI_MADT_LOCAL_APIC_OVERRIDE* lapic_override = (ACPI_MADT_LOCAL_APIC_OVERRIDE*) entry; @@ -93,6 +113,13 @@ static bool parse_madt(void) return false; } +#ifdef RTEMS_SMP + /* Ensure the boot processor is cpu index 0 */ + uint8_t lapic_id = lapic_get_id(); + amd64_lapic_to_cpu_map[lapic_id] = (uint8_t) lapic_count; + cpu_to_lapic_map[lapic_count++] = lapic_id; +#endif + amd64_lapic_base = (uint32_t*) ((uintptr_t) madt->Address); acpi_walk_subtables( (ACPI_TABLE_HEADER*) madt, @@ -103,23 +130,19 @@ static bool parse_madt(void) return true; } -static uint32_t lapic_timer_calibrate(void) +/** + * @brief Calculates the amount of LAPIC timer ticks per second using the PIT. + * + * @return The amount of ticks per second calculated. + */ +static uint32_t lapic_timer_calc_ticks_per_sec(void) { /* Configure LAPIC timer in one-shot mode to prepare for calibration */ amd64_lapic_base[LAPIC_REGISTER_LVT_TIMER] = BSP_VECTOR_APIC_TIMER; amd64_lapic_base[LAPIC_REGISTER_TIMER_DIV] = LAPIC_TIMER_SELECT_DIVIDER; - /* Enable the channel 2 timer gate and disable the speaker output */ - uint8_t chan2_value = (inport_byte(PIT_PORT_CHAN2_GATE) | PIT_CHAN2_TIMER_BIT) - & ~PIT_CHAN2_SPEAKER_BIT; - outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); - - /* Initialize PIT in one-shot mode on Channel 2 */ - outport_byte( - PIT_PORT_MCR, - PIT_SELECT_CHAN2 | PIT_SELECT_ACCESS_LOHI | - PIT_SELECT_ONE_SHOT_MODE | PIT_SELECT_BINARY_MODE - ); + uint8_t chan2_value; + PIT_CHAN2_ENABLE(chan2_value); /* * Make sure interrupts are disabled while we calibrate for 2 reasons: @@ -132,31 +155,16 @@ static uint32_t lapic_timer_calibrate(void) rtems_interrupt_level level; rtems_interrupt_local_disable(level); - /* Set PIT reload value */ uint32_t pit_ticks = PIT_CALIBRATE_TICKS; - uint8_t low_tick = pit_ticks & 0xff; - uint8_t high_tick = (pit_ticks >> 8) & 0xff; - - outport_byte(PIT_PORT_CHAN2, low_tick); - stub_io_wait(); - outport_byte(PIT_PORT_CHAN2, high_tick); + PIT_CHAN2_WRITE_TICKS(pit_ticks); /* Start LAPIC timer's countdown */ const uint32_t lapic_calibrate_init_count = 0xffffffff; - /* Restart PIT by disabling the gated input and then re-enabling it */ - chan2_value &= ~PIT_CHAN2_TIMER_BIT; - outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); - chan2_value |= PIT_CHAN2_TIMER_BIT; - outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); + PIT_CHAN2_START_DELAY(chan2_value); amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = lapic_calibrate_init_count; - while ( pit_ticks <= PIT_CALIBRATE_TICKS ) { - /* Send latch command to read multi-byte value atomically */ - outport_byte(PIT_PORT_MCR, PIT_SELECT_CHAN2); - pit_ticks = inport_byte(PIT_PORT_CHAN2); - pit_ticks |= inport_byte(PIT_PORT_CHAN2) << 8; - } + PIT_CHAN2_WAIT_DELAY(pit_ticks); uint32_t lapic_currcnt = amd64_lapic_base[LAPIC_REGISTER_TIMER_CURRCNT]; DBG_PRINTF("PIT stopped at 0x%" PRIx32 "\n", pit_ticks); @@ -193,6 +201,30 @@ static uint32_t lapic_timer_calibrate(void) return lapic_ticks_per_sec; } +#ifdef RTEMS_SMP +/** + * @brief Sends an interprocessor interrupt + * + * @param dest_id Local APIC ID of the destination + * @param icr_low The flags to be written to the low value of the ICR + */ +static void send_ipi(uint8_t dest_id, uint32_t icr_low) +{ + amd64_lapic_base[LAPIC_REGISTER_ICR_HIGH] = + (amd64_lapic_base[LAPIC_REGISTER_ICR_HIGH] & LAPIC_ICR_HIGH_MASK) | (dest_id << 24); + + amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] = + (amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] & LAPIC_ICR_LOW_MASK) | icr_low; +} + +static void wait_ipi(void) +{ + while (amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] & LAPIC_ICR_DELIV_STAT_PEND) { + amd64_spinwait(); + } +} +#endif + bool lapic_initialize(void) { if (has_lapic_support() == false || parse_madt() == false) { @@ -210,8 +242,8 @@ bool lapic_initialize(void) DBG_PRINTF("APIC is at 0x%" PRIxPTR "\n", (uintptr_t) amd64_lapic_base); DBG_PRINTF( "APIC ID at *0x%" PRIxPTR "=0x%" PRIx32 "\n", - (uintptr_t) &amd64_lapic_base[LAPIC_REGISTER_APICID], - amd64_lapic_base[LAPIC_REGISTER_APICID] + (uintptr_t) &amd64_lapic_base[LAPIC_REGISTER_ID], + amd64_lapic_base[LAPIC_REGISTER_ID] ); DBG_PRINTF( @@ -248,14 +280,12 @@ bool lapic_initialize(void) return true; } -void lapic_timer_initialize(uint64_t desired_freq_hz) +uint32_t lapic_timer_calc_ticks(uint64_t desired_freq_hz) { uint32_t lapic_ticks_per_sec = 0; - uint32_t lapic_tick_collections[LAPIC_TIMER_NUM_CALIBRATIONS] = {0}; uint64_t lapic_tick_total = 0; for (uint32_t i = 0; i < LAPIC_TIMER_NUM_CALIBRATIONS; i++) { - lapic_tick_collections[i] = lapic_timer_calibrate(); - lapic_tick_total += lapic_tick_collections[i]; + lapic_tick_total += lapic_timer_calc_ticks_per_sec(); } lapic_ticks_per_sec = lapic_tick_total / LAPIC_TIMER_NUM_CALIBRATIONS; @@ -264,14 +294,73 @@ void lapic_timer_initialize(uint64_t desired_freq_hz) * frequency (and we use a frequency divider). * * This means: - * apic_ticks_per_sec = (cpu_bus_frequency / timer_divide_value) + * lapic_ticks_per_sec = (cpu_bus_frequency / timer_divide_value) * * Therefore: - * reload_value = apic_ticks_per_sec / desired_freq_hz + * reload_value = lapic_ticks_per_sec / desired_freq_hz */ - uint32_t lapic_timer_reload_value = lapic_ticks_per_sec / desired_freq_hz; + return lapic_ticks_per_sec / desired_freq_hz; +} +void lapic_timer_enable(uint32_t reload_value) +{ amd64_lapic_base[LAPIC_REGISTER_LVT_TIMER] = BSP_VECTOR_APIC_TIMER | LAPIC_SELECT_TMR_PERIODIC; amd64_lapic_base[LAPIC_REGISTER_TIMER_DIV] = LAPIC_TIMER_SELECT_DIVIDER; - amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = lapic_timer_reload_value; + amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = reload_value; } + +#ifdef RTEMS_SMP +uint32_t lapic_get_num_of_procesors(void) +{ + return lapic_count; +} + +void lapic_send_ipi(uint32_t target_cpu_index, uint8_t isr_vector) +{ + uint8_t target_lapic_id = cpu_to_lapic_map[target_cpu_index]; + send_ipi(target_lapic_id, isr_vector); + wait_ipi(); +} + +/** + * This routine attempts to follow the algorithm described in the + * Intel Multiprocessor Specification v1.4 in section B.4. + */ +void lapic_start_ap(uint32_t cpu_index, uint8_t page_vector) +{ + if (cpu_index >= lapic_count) { + return; + } + + uint8_t lapic_id = cpu_to_lapic_map[cpu_index]; + + uint8_t chan2_value; + PIT_CHAN2_ENABLE(chan2_value); + uint32_t pit_ticks = PIT_FREQUENCY/100; /* 10 miliseconds */ + PIT_CHAN2_WRITE_TICKS(pit_ticks); + + /* INIT IPI */ + send_ipi(lapic_id, LAPIC_ICR_DELIV_INIT | LAPIC_ICR_ASSERT | LAPIC_ICR_TRIG_LEVEL); + wait_ipi(); + /* Deassert INIT IPI */ + send_ipi(lapic_id, LAPIC_ICR_DELIV_INIT | LAPIC_ICR_TRIG_LEVEL); + /* Wait 10ms */ + PIT_CHAN2_START_DELAY(chan2_value); + PIT_CHAN2_WAIT_DELAY(pit_ticks); + + pit_ticks = PIT_FREQUENCY/5000; /* 200 microseconds */ + PIT_CHAN2_WRITE_TICKS(pit_ticks); + + /* STARTUP IPI */ + send_ipi(lapic_id, LAPIC_ICR_DELIV_START | page_vector); + wait_ipi(); + /* Wait 200us */ + PIT_CHAN2_START_DELAY(chan2_value); + PIT_CHAN2_WAIT_DELAY(pit_ticks); + /** + * It is possible that the first STARTUP IPI sent is ignored + * so we send it twice. + */ + send_ipi(lapic_id, LAPIC_ICR_DELIV_START | page_vector); +} +#endif diff --git a/bsps/x86_64/amd64/interrupts/idt.c b/bsps/x86_64/amd64/interrupts/idt.c index b9c45a1b22..d037045384 100644 --- a/bsps/x86_64/amd64/interrupts/idt.c +++ b/bsps/x86_64/amd64/interrupts/idt.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -49,7 +50,7 @@ */ interrupt_descriptor amd64_idt[IDT_SIZE] RTEMS_ALIGNED(8) = { { 0 } }; -struct idt_record idtr = { +struct idt_record amd64_idtr = { .limit = (IDT_SIZE * 16) - 1, .base = (uintptr_t) amd64_idt }; @@ -91,7 +92,8 @@ static uintptr_t rtemsIRQs[BSP_IRQ_VECTOR_NUMBER] = { (uintptr_t) rtems_irq_prologue_29, (uintptr_t) rtems_irq_prologue_30, (uintptr_t) rtems_irq_prologue_31, - (uintptr_t) rtems_irq_prologue_32 + (uintptr_t) rtems_irq_prologue_32, + (uintptr_t) rtems_irq_prologue_33 }; void lidt(struct idt_record *ptr) @@ -147,7 +149,7 @@ void bsp_interrupt_facility_initialize(void) amd64_install_raw_interrupt(i, rtemsIRQs[i], &old); } - lidt(&idtr); + lidt(&amd64_idtr); if (lapic_initialize() == false) { bsp_fatal(BSP_FATAL_INTERRUPT_INITIALIZATION); @@ -156,7 +158,7 @@ void bsp_interrupt_facility_initialize(void) rtems_status_code bsp_interrupt_vector_disable(rtems_vector_number vector) { - /* XXX */ + /* XXX: Should be implemented once I/O APIC support is added */ return RTEMS_SUCCESSFUL; } @@ -223,6 +225,37 @@ rtems_status_code bsp_interrupt_vector_is_enabled( rtems_status_code bsp_interrupt_vector_enable(rtems_vector_number vector) { - /* XXX */ + /* XXX: Should be implemented once I/O APIC support is added */ return RTEMS_SUCCESSFUL; } + +#ifdef RTEMS_SMP +rtems_status_code bsp_interrupt_get_affinity( + rtems_vector_number vector, + Processor_mask *affinity +) +{ + (void) vector; + _Processor_mask_From_index( affinity, 0 ); + return RTEMS_UNSATISFIED; +} + +rtems_status_code bsp_interrupt_set_affinity( + rtems_vector_number vector, + const Processor_mask *affinity +) +{ + (void) vector; + (void) affinity; + return RTEMS_UNSATISFIED; +} + +rtems_status_code bsp_interrupt_raise_on( + rtems_vector_number vector, + uint32_t cpu_index +) +{ + bsp_interrupt_assert(bsp_interrupt_is_valid_vector(vector)); + return RTEMS_UNSATISFIED; +} +#endif diff --git a/bsps/x86_64/amd64/interrupts/isr_handler.S b/bsps/x86_64/amd64/interrupts/isr_handler.S index fd8c77683c..36cf80a590 100644 --- a/bsps/x86_64/amd64/interrupts/isr_handler.S +++ b/bsps/x86_64/amd64/interrupts/isr_handler.S @@ -4,6 +4,7 @@ */ /* + * Copyright (C) 2024 Matheus Pecoraro * Copyright (c) 2018. * Amaan Cheval * @@ -48,8 +49,8 @@ SYM(apic_spurious_handler): * interrupts as persistent scratch registers (i.e. calls will not destroy * them), as long as we also save and restore it for the interrupted task. */ -.set SCRATCH_REG0, rbp -.set SCRATCH_REG1, rbx +.set SCRATCH_REG0, rbp +.set SCRATCH_REG1, rbx /* * We need to set a distinct handler for every interrupt vector so that @@ -98,6 +99,7 @@ DISTINCT_INTERRUPT_ENTRY(29) DISTINCT_INTERRUPT_ENTRY(30) DISTINCT_INTERRUPT_ENTRY(31) DISTINCT_INTERRUPT_ENTRY(32) +DISTINCT_INTERRUPT_ENTRY(33) SYM(_ISR_Handler): .save_cpu_interrupt_frame: @@ -105,36 +107,32 @@ SYM(_ISR_Handler): movq rsp, SAVED_RSP /* Make space for CPU_Interrupt_frame */ - subq $CPU_INTERRUPT_FRAME_SIZE, rsp + subq $CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE, rsp .set ALIGNMENT_MASK, ~(CPU_STACK_ALIGNMENT - 1) andq $ALIGNMENT_MASK, rsp // XXX: Save interrupt mask? - /* Save caller-saved registers to CPU_Interrupt_frame */ - movq rax, (8 * CPU_SIZEOF_POINTER)(rsp) - movq rcx, (7 * CPU_SIZEOF_POINTER)(rsp) - movq rdx, (6 * CPU_SIZEOF_POINTER)(rsp) - movq rsi, (5 * CPU_SIZEOF_POINTER)(rsp) - movq r8, (4 * CPU_SIZEOF_POINTER)(rsp) - movq r9, (3 * CPU_SIZEOF_POINTER)(rsp) - movq r10, (2 * CPU_SIZEOF_POINTER)(rsp) - movq r11, (1 * CPU_SIZEOF_POINTER)(rsp) - - /* Save the initial rsp */ - movq SAVED_RSP, (0 * CPU_SIZEOF_POINTER)(rsp) - /* Save x87 FPU, MMX and SSE state */ -.set FXSAVE_SIZE, 512 - /* Make space for FXSAVE */ - subq $FXSAVE_SIZE, rsp fwait - fxsave64 (rsp) + fxsave64 (CPU_INTERRUPT_FRAME_SSE_STATE)(rsp) /* Reset to a clean state */ fninit - subq $4, rsp - movl $0x1F80, (rsp) - ldmxcsr (rsp) - addq $4, rsp + /* Use CPU_INTERRUPT_FRAME_RAX as scratch space */ + movl $0x1F80, (CPU_INTERRUPT_FRAME_RAX)(rsp) + ldmxcsr (CPU_INTERRUPT_FRAME_RAX)(rsp) + + /* Save caller-saved registers to CPU_Interrupt_frame */ + movq rax, (CPU_INTERRUPT_FRAME_RAX)(rsp) + movq rcx, (CPU_INTERRUPT_FRAME_RCX)(rsp) + movq rdx, (CPU_INTERRUPT_FRAME_RDX)(rsp) + movq rsi, (CPU_INTERRUPT_FRAME_RSI)(rsp) + movq r8, (CPU_INTERRUPT_FRAME_R8)(rsp) + movq r9, (CPU_INTERRUPT_FRAME_R9)(rsp) + movq r10, (CPU_INTERRUPT_FRAME_R10)(rsp) + movq r11, (CPU_INTERRUPT_FRAME_R11)(rsp) + + /* Save the initial rsp */ + movq SAVED_RSP, (CPU_INTERRUPT_FRAME_RSP)(rsp) .switch_stack_if_needed: /* Save current aligned rsp so we can find CPU_Interrupt_frame again later */ @@ -145,18 +143,15 @@ SYM(_ISR_Handler): * outermost interrupt, which means we've been using the task's stack so far */ -#ifdef RTEMS_SMP - /* XXX: We should call _CPU_SMP_Get_current_processor here */ -#endif .set Per_CPU_Info, SCRATCH_REG1 - movq $SYM(_Per_CPU_Information), Per_CPU_Info - cmpq $0, PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) + GET_SELF_CPU_CONTROL_RBX /* SCRATCH_REG1 == rbx */ + cmpl $0, PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) jne .skip_switch .switch_stack: movq PER_CPU_INTERRUPT_STACK_HIGH(Per_CPU_Info), rsp .skip_switch: - incq PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) - incq PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) + incl PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) + incl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) .call_isr_dispatch: /* REG_ARG0 already includes the vector number, so we can simply call */ @@ -166,37 +161,61 @@ SYM(_ISR_Handler): /* If this is the outermost stack, this restores the task stack */ movq SAVED_RSP, rsp - decq PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) - decq PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) - /* XXX: Bug in QEMU causing ZF to not be set by decq necessitating the cmpb */ - cmpb $0, PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) - /* If dispatch is non-zero, it is disabled, so skip scheduling it */ + decl PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info) + decl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) + movl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info), %eax + orl PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info), %eax + /** + * If either thread dispatch disable level or ISR dispatch disable + * are non-zero skip scheduling the dispatch + */ + cmpl $0, %eax jne .restore_cpu_interrupt_frame + /* Only call _Thread_Do_dispatch if dispatch needed is not 0 */ + cmpb $0, PER_CPU_DISPATCH_NEEDED(Per_CPU_Info) + je .restore_cpu_interrupt_frame .schedule_dispatch: + /* Set ISR dispatch disable and thread dispatch disable level to one */ + movl $1, PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info) + movl $1, PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info) + + /* Call Thread_Do_dispatch(), this function will enable interrupts */ + movq Per_CPU_Info, REG_ARG0 + movq $CPU_ISR_LEVEL_ENABLED, REG_ARG1 /* Set interrupt flag manually */ + call _Thread_Do_dispatch + + /* Disable interrupts */ + cli + + /** + * It is possible that after returning from _Thread_Do_dispatch the + * Per_CPU_Info has changed + */ + GET_SELF_CPU_CONTROL_RBX /* Per_CPU_Info == SCRATCH_REG1 == rbx */ cmpb $0, PER_CPU_DISPATCH_NEEDED(Per_CPU_Info) - je .restore_cpu_interrupt_frame - call _Thread_Dispatch + jne .schedule_dispatch + + /* Done with thread dispatching */ + movl $0, PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info) .restore_cpu_interrupt_frame: /* Restore x87 FPU, MMX and SSE state */ fwait - fxrstor64 (rsp) - /* Restore rsp to CPU_Interrupt_frame */ - addq $FXSAVE_SIZE, rsp + fxrstor64 (CPU_INTERRUPT_FRAME_SSE_STATE)(rsp) /* Restore registers from CPU_Interrupt_frame */ - movq (8 * CPU_SIZEOF_POINTER)(rsp), rax - movq (7 * CPU_SIZEOF_POINTER)(rsp), rcx - movq (6 * CPU_SIZEOF_POINTER)(rsp), rdx - movq (5 * CPU_SIZEOF_POINTER)(rsp), rsi - movq (4 * CPU_SIZEOF_POINTER)(rsp), r8 - movq (3 * CPU_SIZEOF_POINTER)(rsp), r9 - movq (2 * CPU_SIZEOF_POINTER)(rsp), r10 - movq (1 * CPU_SIZEOF_POINTER)(rsp), r11 + movq (CPU_INTERRUPT_FRAME_RAX)(rsp), rax + movq (CPU_INTERRUPT_FRAME_RCX)(rsp), rcx + movq (CPU_INTERRUPT_FRAME_RDX)(rsp), rdx + movq (CPU_INTERRUPT_FRAME_RSI)(rsp), rsi + movq (CPU_INTERRUPT_FRAME_R8)(rsp), r8 + movq (CPU_INTERRUPT_FRAME_R9)(rsp), r9 + movq (CPU_INTERRUPT_FRAME_R10)(rsp), r10 + movq (CPU_INTERRUPT_FRAME_R11)(rsp), r11 /* Restore the rsp value from just before _ISR_Handler was called */ - movq (0 * CPU_SIZEOF_POINTER)(rsp), SAVED_RSP + movq (CPU_INTERRUPT_FRAME_RSP)(rsp), SAVED_RSP movq SAVED_RSP, rsp /* Restore args DISTINCT_INTERRUPT_ENTRY pushed to task stack */ diff --git a/bsps/x86_64/amd64/start/ap_trampoline.S b/bsps/x86_64/amd64/start/ap_trampoline.S new file mode 100644 index 0000000000..5ae89d43dc --- /dev/null +++ b/bsps/x86_64/amd64/start/ap_trampoline.S @@ -0,0 +1,147 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2024 Matheus Pecoraro + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * This file contains the trampoline code that will be executed by every + * Application Processor when first started. + */ + +#include +#include +#include +#include + +.set PM_GDT_CODE_OFFSET, 0x08 # Offset of code segment descriptor into GDT +.set PM_GDT_DATA_OFFSET, 0x10 # Offset of data segment descriptor into GDT +.set CR0_PE, 1 # Protected mode flag on CR0 register +.set CR0_PG, 0x80000000 # Paging flag on CR0 register +.set CR0_EM_BITMASK, (~0x4) # Bitmask for disabling x87 FPU Emulation bit +.set CR0_MP, 0x2 # Monitor co-processor flag +.set CR4_PAE, 0x20 # Physical Address Extension flag on CR4 register +.set CR4_OSFXSR, 0x200 # OS support for FXSAVE and FXRSTOR flag +.set CR4_OSXMMEXCPT, 0x400 # OS support for unmasked SIMD FP exceptions flag +.set CR4_SSEFLAGS, (CR4_OSFXSR | CR4_OSXMMEXCPT) +.set EFER_MSR, 0xC0000080 # EFER MSR number +.set EFER_MSR_LME, 0x100 # Long Mode Enable flag on the EFER MSR + +BEGIN_CODE + +.code16 +PUBLIC(_Trampoline_start) +SYM(_Trampoline_start): + cli + cld + jmp .real_mode + +.real_mode: + lgdt (gdt_desc - _Trampoline_start) + TRAMPOLINE_ADDR + + # Enter protected mode + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Jump to protected mode + ljmpl $PM_GDT_CODE_OFFSET, $((.protected_mode - _Trampoline_start) + TRAMPOLINE_ADDR) + +.code32 +.protected_mode: + # Load data segment registers + movw $PM_GDT_DATA_OFFSET, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + # Move PML4 table address to cr3 + movl $amd64_pml4, %eax + movl %eax, %cr3 + + # Flip PAE bit in cr4 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 + + # Set LME on the EFER MSR + movl $EFER_MSR, %ecx + rdmsr + orl $EFER_MSR_LME, %eax + wrmsr + + # Enable paging + movl %cr0, %eax + orl $CR0_PG, %eax + movl %eax, %cr0 + + # Update GDT for long mode + lgdt amd64_gdt_descriptor + + # Jump to long mode + ljmp $GDT_CODE_SEG_OFFSET, $((.long_mode - _Trampoline_start) + TRAMPOLINE_ADDR) + +.code64 +.long_mode: + # Load data segment registers + movw $GDT_DATA_SEG_OFFSET, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + # Acquire the processor's stack + GET_SELF_CPU_CONTROL_RAX + movq PER_CPU_INTERRUPT_STACK_HIGH(rax), rsp + + # Enable SSE + movq %cr0, rax + andq $CR0_EM_BITMASK, rax + orq $CR0_MP, rax + movq rax, %cr0 + movq %cr4, rax + orq $CR4_SSEFLAGS, rax + movq rax, %cr4 + + # Exit trampoline code + movabsq $smp_init_ap, rax + call *rax + +/* Temporary GDT used to get to long mode */ +gdt: + /* NULL segment */ + .quad 0 + /* Code segment */ + .word 0xFFFF, 0 + .byte 0, 0x9F, 0xCF, 0 + /* Data segment */ + .word 0xFFFF, 0 + .byte 0, 0x92, 0xCF, 0 +gdt_desc: + .word (gdt_desc - gdt) - 1 + .long (gdt - _Trampoline_start) + TRAMPOLINE_ADDR + +trampoline_end: +PUBLIC(_Trampoline_size) +SYM(_Trampoline_size): +.quad (trampoline_end - _Trampoline_start) diff --git a/bsps/x86_64/amd64/start/bspreset.c b/bsps/x86_64/amd64/start/bspreset.c index 587e1c72c4..7a8ff07858 100644 --- a/bsps/x86_64/amd64/start/bspreset.c +++ b/bsps/x86_64/amd64/start/bspreset.c @@ -46,10 +46,15 @@ void bsp_reset( rtems_fatal_source source, rtems_fatal_code code ) (void) source; (void) code; + /** + * It is possible that AcpiEnterSleepStatePrep causes a thread dispatch + * so we execute it with interrupts enabled + */ + amd64_enable_interrupts(); ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5); + amd64_disable_interrupts(); if (status == AE_OK) { - amd64_disable_interrupts(); AcpiEnterSleepState(ACPI_STATE_S5); } diff --git a/bsps/x86_64/amd64/start/bspsmp.c b/bsps/x86_64/amd64/start/bspsmp.c new file mode 100644 index 0000000000..73a6898ab2 --- /dev/null +++ b/bsps/x86_64/amd64/start/bspsmp.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +/** + * @file + * + * @ingroup RTEMSBSPsX8664AMD64 + * + * @brief BSP SMP implementation + */ + +/* + * Copyright (C) 2024 Matheus Pecoraro + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +extern void _Trampoline_start(void); +extern const uint64_t _Trampoline_size; + +static bool has_ap_started = false; + +/** + * @brief Copies the trampoline code to the address where APs will boot from. + * + * TODO: We should ideally parse through the UEFI Memory Map to find a + * free page under 1MB + */ +static void copy_trampoline(void) +{ + /* Copy the trampoline code to its destiny address */ + void* trampoline_dest = (void*) TRAMPOLINE_ADDR; + memcpy(trampoline_dest, (void*) _Trampoline_start, _Trampoline_size); +} + +/** + * @brief Waits for the Application Processor to set the flag. + * + * @param timeout_ms Timeout in miliseconds. + * + * @return true if successful. + */ +static bool wait_for_ap(uint32_t timeout_ms) +{ + uint8_t chan2_value; + uint32_t pit_ticks = PIT_FREQUENCY/1000; /* a milisecond */ + + PIT_CHAN2_ENABLE(chan2_value); + PIT_CHAN2_WRITE_TICKS(pit_ticks); + + for (int i = 0; has_ap_started == false && i < timeout_ms; i++) { + PIT_CHAN2_START_DELAY(chan2_value); + PIT_CHAN2_WAIT_DELAY(pit_ticks); + amd64_spinwait(); + } + + return has_ap_started; +} + +static void bsp_inter_processor_interrupt(void* arg) +{ + _SMP_Inter_processor_interrupt_handler(_Per_CPU_Get()); + lapic_eoi(); +} + +void _CPU_SMP_Prepare_start_multitasking(void) +{ + /* Do nothing */ +} + +bool _CPU_SMP_Start_processor(uint32_t cpu_index) +{ + has_ap_started = false; + lapic_start_ap(cpu_index, TRAMPOLINE_PAGE_VECTOR); + return wait_for_ap(WAIT_FOR_AP_TIMEOUT_MS); +} + +uint32_t _CPU_SMP_Get_current_processor(void) +{ + uint8_t lapic_id = lapic_get_id(); + return amd64_lapic_to_cpu_map[lapic_id]; +} + +uint32_t _CPU_SMP_Initialize(void) +{ + copy_trampoline(); + return lapic_get_num_of_procesors(); +} + +void _CPU_SMP_Finalize_initialization(uint32_t cpu_count) +{ + if (cpu_count > 0) { + rtems_status_code sc = rtems_interrupt_handler_install( + BSP_VECTOR_IPI, + "IPI", + RTEMS_INTERRUPT_UNIQUE, + bsp_inter_processor_interrupt, + NULL + ); + assert(sc == RTEMS_SUCCESSFUL); + } +} + +void _CPU_SMP_Send_interrupt(uint32_t target_processor_index) +{ + lapic_send_ipi(target_processor_index, BSP_VECTOR_IPI); +} + +void smp_init_ap(void) +{ + has_ap_started = true; + + Context_Control_fp* null_fp_context_p = &_CPU_Null_fp_context; + _CPU_Context_restore_fp(&null_fp_context_p); + + amd64_lapic_base[LAPIC_REGISTER_SPURIOUS] = + LAPIC_SPURIOUS_ENABLE | BSP_VECTOR_SPURIOUS; + + lidt(&amd64_idtr); + + _SMP_Start_multitasking_on_secondary_processor(_Per_CPU_Get()); +} diff --git a/bsps/x86_64/amd64/start/bspstart.c b/bsps/x86_64/amd64/start/bspstart.c index ef3e21da83..00a4e6d758 100644 --- a/bsps/x86_64/amd64/start/bspstart.c +++ b/bsps/x86_64/amd64/start/bspstart.c @@ -34,13 +34,13 @@ * SUCH DAMAGE. */ -#include #include #include #include #include #include #include +#include #if defined(BSP_USE_EFI_BOOT_SERVICES) && !defined(BSP_MULTIBOOT_SUPPORT) #error "RTEMS amd64efi BSP requires multiboot2 support!" @@ -53,7 +53,8 @@ void bsp_start(void) if (!uefi_bootservices_running()) { #endif paging_init(); - assert(acpi_tables_initialize()); + bool acpi_table_result = acpi_tables_initialize(); + _Assert(acpi_table_result); bsp_interrupt_initialize(); #ifdef BSP_MULTIBOOT_SUPPORT } diff --git a/bsps/x86_64/amd64/start/gdt.c b/bsps/x86_64/amd64/start/gdt.c new file mode 100644 index 0000000000..3eef3f8b91 --- /dev/null +++ b/bsps/x86_64/amd64/start/gdt.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ + +/** + * @file + * + * @ingroup RTEMSBSPsX8664AMD64 + * + * @brief Global Descriptor Table + */ + +/* + * Copyright (C) 2024 Matheus Pecoraro + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +static const gdt_entry gdt_entries[] = { + {0, 0, 0, 0, 0, 0}, /* NULL segment */ + {0xFFFF, 0, 0, 0x9E, 0xAF, 0}, /* Code segment */ + {0xFFFF, 0, 0, 0x92, 0xCF, 0} /* Data segment */ +}; + +const gdt_desc amd64_gdt_descriptor = { + sizeof(gdt_entries) - 1, + (uint64_t) gdt_entries +}; diff --git a/bsps/x86_64/amd64/start/start.S b/bsps/x86_64/amd64/start/start.S index c6ad69f524..58f9181870 100644 --- a/bsps/x86_64/amd64/start/start.S +++ b/bsps/x86_64/amd64/start/start.S @@ -27,6 +27,7 @@ */ #include +#include #ifdef BSP_MULTIBOOT_SUPPORT #include @@ -41,12 +42,31 @@ _start: .cfi_startproc movq %rsp, %rbp /* - * _ISR_Stack_area_end is aligned to CPU_INTERRUPT_STACK_ALIGNMENT (64 bits) - * by compiler directive at /cpukit/include/rtems/confdefs/percpu.h + * _ISR_Stack_size is aligned to CPU_INTERRUPT_STACK_ALIGNMENT (64 bits) + * by compiler directive at /cpukit/include/rtems/confdefs/percpu.h. * No reference will occur beyond the stack region since the call - * instruction will decrement the %rsp and then save the value of %rip + * instruction will decrement the %rsp and then save the value of %rip. */ - movabsq $_ISR_Stack_area_end, %rsp + movabsq $_ISR_Stack_area_begin, %rsp + addq $_ISR_Stack_size, %rsp + +#ifndef BSP_USE_EFI_BOOT_SERVICES + /* Use our own GDT instead of the one set up by UEFI */ + lgdt amd64_gdt_descriptor + + /* Load data segment registers */ + movw $GDT_DATA_SEG_OFFSET, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + /* Load code segment register */ + pushq $GDT_CODE_SEG_OFFSET + pushq $after_load_gdt + retfq + +after_load_gdt: +#endif #ifndef BSP_MULTIBOOT_SUPPORT /** diff --git a/cpukit/score/cpu/x86_64/include/rtems/asm.h b/cpukit/score/cpu/x86_64/include/rtems/asm.h index f60237b91a..0381388271 100644 --- a/cpukit/score/cpu/x86_64/include/rtems/asm.h +++ b/cpukit/score/cpu/x86_64/include/rtems/asm.h @@ -40,6 +40,7 @@ #endif #include #include +#include /** * @defgroup RTEMSScoreCPUx86-64ASM x86-64 Assembler Support @@ -151,6 +152,53 @@ */ #define EXTERN(sym) .globl SYM (sym) +#ifdef RTEMS_SMP +/* REG32 must be the lower 32 bits of REG */ +.macro GET_CPU_INDEX REG REG32 + .set LAPIC_ID, 0x20 + .set LAPIC_ID_SHIFT, 24 + movq amd64_lapic_base, \REG + movl LAPIC_ID(\REG), \REG32 + shrq $LAPIC_ID_SHIFT, \REG /* LAPIC_ID in REG */ + movzbq amd64_lapic_to_cpu_map(\REG), \REG /* CPU ID in REG */ +.endm + +/* REG32 must be the lower 32 bits of REG */ +.macro GET_SELF_CPU_CONTROL REG REG32 + GET_CPU_INDEX \REG \REG32 + shlq $PER_CPU_CONTROL_SIZE_LOG2, \REG /* Calculate offset for CPU structure */ + leaq _Per_CPU_Information(\REG), \REG /* Address of info for current CPU in REG */ +.endm +#else +.macro GET_CPU_INDEX REG REG32 + movq $0, \REG +.endm + +.macro GET_SELF_CPU_CONTROL REG REG32 + leaq _Per_CPU_Information, \REG +.endm #endif +/* Couldn't find a better way to do this under the GNU as macro limitations */ +.macro GET_SELF_CPU_CONTROL_RAX + GET_SELF_CPU_CONTROL rax,%eax +.endm +.macro GET_SELF_CPU_CONTROL_RBX + GET_SELF_CPU_CONTROL rbx,%ebx +.endm +.macro GET_SELF_CPU_CONTROL_RCX + GET_SELF_CPU_CONTROL rcx,%ecx +.endm +.macro GET_SELF_CPU_CONTROL_RDX + GET_SELF_CPU_CONTROL rdx,%edx +.endm +.macro GET_SELF_CPU_CONTROL_RDI + GET_SELF_CPU_CONTROL rdi,%edi +.endm +.macro GET_SELF_CPU_CONTROL_RSI + GET_SELF_CPU_CONTROL rsi,%esi +.endm + +#endif // _RTEMS_ASM_H + /** @} */ diff --git a/cpukit/score/cpu/x86_64/include/rtems/score/cpu.h b/cpukit/score/cpu/x86_64/include/rtems/score/cpu.h index ef757a0618..e9ce707090 100644 --- a/cpukit/score/cpu/x86_64/include/rtems/score/cpu.h +++ b/cpukit/score/cpu/x86_64/include/rtems/score/cpu.h @@ -7,6 +7,7 @@ */ /* + * Copyright (C) 2024 Matheus Pecoraro * Copyright (c) 2018. * Amaan Cheval * @@ -61,6 +62,20 @@ extern "C" { #define CPU_EFLAGS_INTERRUPTS_ON 0x00003202 #define CPU_EFLAGS_INTERRUPTS_OFF 0x00003002 +#define CPU_CONTEXT_CONTROL_EFLAGS 0 + +#define CPU_CONTEXT_CONTROL_RBX CPU_CONTEXT_CONTROL_EFLAGS + 8 +#define CPU_CONTEXT_CONTROL_RSP CPU_CONTEXT_CONTROL_RBX + 8 +#define CPU_CONTEXT_CONTROL_RBP CPU_CONTEXT_CONTROL_RSP + 8 +#define CPU_CONTEXT_CONTROL_R12 CPU_CONTEXT_CONTROL_RBP + 8 +#define CPU_CONTEXT_CONTROL_R13 CPU_CONTEXT_CONTROL_R12 + 8 +#define CPU_CONTEXT_CONTROL_R14 CPU_CONTEXT_CONTROL_R13 + 8 +#define CPU_CONTEXT_CONTROL_R15 CPU_CONTEXT_CONTROL_R14 + 8 + +#define CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE CPU_CONTEXT_CONTROL_R15 + 8 + +#define CPU_CONTEXT_CONTROL_IS_EXECUTING CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE + 4 + #ifndef ASM typedef struct { @@ -78,10 +93,12 @@ typedef struct { uint64_t r14; uint64_t r15; + uint32_t isr_dispatch_disable; + // XXX: FS segment descriptor for TLS #ifdef RTEMS_SMP - volatile bool is_executing; + volatile uint16_t is_executing; #endif } Context_Control; @@ -97,17 +114,41 @@ typedef struct { #define _CPU_Context_Get_SP( _context ) \ (_context)->rsp +#endif /* !ASM */ + +#define CPU_INTERRUPT_FRAME_SSE_STATE 0 + +#define CPU_INTERRUPT_FRAME_RAX CPU_INTERRUPT_FRAME_SSE_STATE + 512 +#define CPU_INTERRUPT_FRAME_RCX CPU_INTERRUPT_FRAME_RAX + 8 +#define CPU_INTERRUPT_FRAME_RDX CPU_INTERRUPT_FRAME_RCX + 8 +#define CPU_INTERRUPT_FRAME_RSI CPU_INTERRUPT_FRAME_RDX + 8 +#define CPU_INTERRUPT_FRAME_R8 CPU_INTERRUPT_FRAME_RSI + 8 +#define CPU_INTERRUPT_FRAME_R9 CPU_INTERRUPT_FRAME_R8 + 8 +#define CPU_INTERRUPT_FRAME_R10 CPU_INTERRUPT_FRAME_R9 + 8 +#define CPU_INTERRUPT_FRAME_R11 CPU_INTERRUPT_FRAME_R10 + 8 +#define CPU_INTERRUPT_FRAME_RSP CPU_INTERRUPT_FRAME_R11 + 8 + +#ifndef ASM + /* * Caller-saved registers for interrupt frames */ typedef struct { - /** - * @note: rdi is a caller-saved register too, but it's used in function calls - * and is hence saved separately on the stack; - * - * @see DISTINCT_INTERRUPT_ENTRY - * @see _ISR_Handler - */ + /* Registers saved by CPU */ + uint64_t error_code; /* only in some exceptions */ + uint64_t rip; + uint64_t cs; + uint64_t rflags; + uint64_t rsp; + uint64_t ss; + + /* Saved in rtems_irq_prologue_* */ + uint64_t rdi; + uint64_t rbp; + uint64_t rbx; + + /* SSE state saved by the FXSAVE instruction */ + uint8_t sse_state[512]; uint64_t rax; uint64_t rcx; @@ -126,12 +167,12 @@ typedef struct { */ uint64_t saved_rsp; - /* XXX: - * - FS segment selector for TLS - * - x87 status word? - * - MMX? - * - XMM? + /** + * The caller-saved registers needs to start in a 16-byte aligned position + * on the stack for the FXSAVE instruction. Therefore, we have 8 extra bytes + * in case the interrupt handler needs to align it. */ + uint8_t padding[8]; } CPU_Interrupt_frame; extern Context_Control_fp _CPU_Null_fp_context; @@ -140,7 +181,16 @@ extern Context_Control_fp _CPU_Null_fp_context; #endif /* !ASM */ -#define CPU_INTERRUPT_FRAME_SIZE 72 +#define CPU_INTERRUPT_FRAME_X86_64_SIZE 48 +#define CPU_INTERRUPT_FRAME_PROLOGUE_SIZE 24 +#define CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE (512 + 72) +#define CPU_INTERRUPT_FRAME_PADDING_SIZE 8 + +#define CPU_INTERRUPT_FRAME_SIZE \ + (CPU_INTERRUPT_FRAME_X86_64_SIZE + \ + CPU_INTERRUPT_FRAME_PROLOGUE_SIZE + \ + CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE + \ + CPU_INTERRUPT_FRAME_PADDING_SIZE) /* * When SMP is enabled, percpuasm.c has a similar assert, but since we use the @@ -155,7 +205,7 @@ extern Context_Control_fp _CPU_Null_fp_context; #define CPU_MPCI_RECEIVE_SERVER_EXTRA_STACK 0 #define CPU_PROVIDES_ISR_IS_IN_PROGRESS FALSE -#define CPU_STACK_MINIMUM_SIZE (1024*4) +#define CPU_STACK_MINIMUM_SIZE (1024*8) #define CPU_SIZEOF_POINTER 8 #define CPU_ALIGNMENT 16 #define CPU_HEAP_ALIGNMENT CPU_ALIGNMENT @@ -166,6 +216,9 @@ extern Context_Control_fp _CPU_Null_fp_context; * ISR handler macros */ +/* ISR_Level is an uint32_t meaning we can't use RFLAGS to represent it */ +#define CPU_ISR_LEVEL_ENABLED 0 + #ifndef ASM #define _CPU_ISR_Enable(_level) \ @@ -282,6 +335,11 @@ void _CPU_Context_switch( Context_Control *heir ); +RTEMS_NO_RETURN void _CPU_Context_switch_no_return( + Context_Control *executing, + Context_Control *heir +); + RTEMS_NO_RETURN void _CPU_Context_restore( Context_Control *new_context ); typedef struct { @@ -349,7 +407,7 @@ uint32_t _CPU_Counter_frequency( void ); CPU_Counter_ticks _CPU_Counter_read( void ); #ifdef RTEMS_SMP - * + uint32_t _CPU_SMP_Initialize( void ); bool _CPU_SMP_Start_processor( uint32_t cpu_index ); @@ -358,16 +416,14 @@ CPU_Counter_ticks _CPU_Counter_read( void ); void _CPU_SMP_Prepare_start_multitasking( void ); - static inline uint32_t _CPU_SMP_Get_current_processor( void ) - { - return 123; - } + uint32_t _CPU_SMP_Get_current_processor( void ); void _CPU_SMP_Send_interrupt( uint32_t target_processor_index ); static inline bool _CPU_Context_Get_is_executing( const Context_Control *context ) + { return context->is_executing; } @@ -376,6 +432,7 @@ CPU_Counter_ticks _CPU_Counter_read( void ); bool is_executing ) { + context->is_executing = is_executing; } #endif /* RTEMS_SMP */ diff --git a/cpukit/score/cpu/x86_64/include/rtems/score/cpu_asm.h b/cpukit/score/cpu/x86_64/include/rtems/score/cpu_asm.h index 896a258493..bb23ef8faa 100644 --- a/cpukit/score/cpu/x86_64/include/rtems/score/cpu_asm.h +++ b/cpukit/score/cpu/x86_64/include/rtems/score/cpu_asm.h @@ -127,6 +127,11 @@ static inline void stub_io_wait(void) */ } +static inline void amd64_spinwait(void) +{ + __asm__ volatile("pause" : : : "memory"); +} + #endif /* !ASM */ #endif diff --git a/cpukit/score/cpu/x86_64/include/rtems/score/idt.h b/cpukit/score/cpu/x86_64/include/rtems/score/idt.h index e1f69b1409..4e88c3fd3f 100644 --- a/cpukit/score/cpu/x86_64/include/rtems/score/idt.h +++ b/cpukit/score/cpu/x86_64/include/rtems/score/idt.h @@ -47,6 +47,8 @@ extern "C" { #define BSP_VECTOR_SPURIOUS 0xFF /* Target vector number for the APIC timer */ #define BSP_VECTOR_APIC_TIMER 32 +/* Target vector number for the APIC timer */ +#define BSP_VECTOR_IPI 33 typedef struct _interrupt_descriptor { uint16_t offset_0; // bits 0-15 @@ -59,13 +61,14 @@ typedef struct _interrupt_descriptor { uint32_t reserved_zero; } interrupt_descriptor; -extern interrupt_descriptor amd64_idt[IDT_SIZE]; - struct idt_record { uint16_t limit; /* Size of IDT array - 1 */ uintptr_t base; /* Pointer to IDT array */ } RTEMS_PACKED; +extern interrupt_descriptor amd64_idt[IDT_SIZE]; +extern struct idt_record amd64_idtr; + RTEMS_STATIC_ASSERT( sizeof(struct idt_record) == 10, "IDT pointer must be exactly 10 bytes" @@ -123,6 +126,7 @@ extern void rtems_irq_prologue_29(void); extern void rtems_irq_prologue_30(void); extern void rtems_irq_prologue_31(void); extern void rtems_irq_prologue_32(void); +extern void rtems_irq_prologue_33(void); #ifdef __cplusplus } diff --git a/cpukit/score/cpu/x86_64/x86_64-context-switch.S b/cpukit/score/cpu/x86_64/x86_64-context-switch.S index 1e98882048..7a3b7d325a 100644 --- a/cpukit/score/cpu/x86_64/x86_64-context-switch.S +++ b/cpukit/score/cpu/x86_64/x86_64-context-switch.S @@ -1,4 +1,5 @@ /* + * Copyright (C) 2024 Matheus Pecoraro * Copyright (c) 2018. * Amaan Cheval * @@ -45,6 +46,7 @@ BEGIN_CODE .p2align 1 PUBLIC(_CPU_Context_switch) +PUBLIC(_CPU_Context_switch_no_return) /* save context argument */ .set RUNCONTEXT_ARG, REG_ARG0 @@ -52,37 +54,83 @@ PUBLIC(_CPU_Context_switch) .set HEIRCONTEXT_ARG, REG_ARG1 SYM(_CPU_Context_switch): - movq RUNCONTEXT_ARG, rax /* rax = running threads context */ +SYM(_CPU_Context_switch_no_return): + movq RUNCONTEXT_ARG, rax /* rax = running threads context */ + GET_SELF_CPU_CONTROL_RCX /* rcx = per CPU information */ /* Fill up Context_Control struct */ pushf - popq (0 * CPU_SIZEOF_POINTER)(rax) /* pop rflags into context */ - movq rbx, (1 * CPU_SIZEOF_POINTER)(rax) - movq rsp, (2 * CPU_SIZEOF_POINTER)(rax) - movq rbp, (3 * CPU_SIZEOF_POINTER)(rax) - movq r12, (4 * CPU_SIZEOF_POINTER)(rax) - movq r13, (5 * CPU_SIZEOF_POINTER)(rax) - movq r14, (6 * CPU_SIZEOF_POINTER)(rax) - movq r15, (7 * CPU_SIZEOF_POINTER)(rax) + popq CPU_CONTEXT_CONTROL_EFLAGS(rax) /* pop rflags into context */ + movq rbx, CPU_CONTEXT_CONTROL_RBX(rax) + movq rsp, CPU_CONTEXT_CONTROL_RSP(rax) + movq rbp, CPU_CONTEXT_CONTROL_RBP(rax) + movq r12, CPU_CONTEXT_CONTROL_R12(rax) + movq r13, CPU_CONTEXT_CONTROL_R13(rax) + movq r14, CPU_CONTEXT_CONTROL_R14(rax) + movq r15, CPU_CONTEXT_CONTROL_R15(rax) + movl PER_CPU_ISR_DISPATCH_DISABLE(rcx), %edx + movl %edx, CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE(rax) + + movq rax, r8 /* r8 = running threads context */ movq HEIRCONTEXT_ARG, rax /* rax = heir threads context */ -restore: - movq (1 * CPU_SIZEOF_POINTER)(rax), rbx - movq (2 * CPU_SIZEOF_POINTER)(rax), rsp +#ifdef RTEMS_SMP + /* + * The executing thread no longer executes on this processor. Switch + * the stack to the temporary interrupt stack of this processor. Mark + * the context of the executing thread as not executing. + */ + leaq PER_CPU_INTERRUPT_FRAME_AREA + CPU_INTERRUPT_FRAME_SIZE(rcx), rsp + movw $0, CPU_CONTEXT_CONTROL_IS_EXECUTING(r8) + +.check_is_executing: + lock btsw $0, CPU_CONTEXT_CONTROL_IS_EXECUTING(rax) /* Indicator in carry flag */ + jnc .restore + +.get_potential_new_heir: + /* We may have a new heir */ + + /* Read the executing and heir */ + movq PER_CPU_OFFSET_EXECUTING(rcx), r9 + movq PER_CPU_OFFSET_HEIR(rcx), r10 + + /* + * Update the executing only if necessary to avoid cache line + * monopolization. + */ + cmpq r9, r10 + je .check_is_executing + + /* Calculate the heir context pointer */ + addq r10, rax + subq r9, rax + + /* Update the executing */ + movq r10, PER_CPU_OFFSET_EXECUTING(rcx) + + jmp .check_is_executing +#endif + +.restore: + movq CPU_CONTEXT_CONTROL_RBX(rax), rbx + movq CPU_CONTEXT_CONTROL_RSP(rax), rsp /* * We need to load rflags after rsp to avoid an interrupt while the ISR stack * is still being used during the initialization process */ - pushq (0 * CPU_SIZEOF_POINTER)(rax) /* push rflags */ - popf /* restore rflags */ + pushq CPU_CONTEXT_CONTROL_EFLAGS(rax) /* push rflags */ + popf /* restore rflags */ - movq (3 * CPU_SIZEOF_POINTER)(rax), rbp - movq (4 * CPU_SIZEOF_POINTER)(rax), r12 - movq (5 * CPU_SIZEOF_POINTER)(rax), r13 - movq (6 * CPU_SIZEOF_POINTER)(rax), r14 - movq (7 * CPU_SIZEOF_POINTER)(rax), r15 + movq CPU_CONTEXT_CONTROL_RBP(rax), rbp + movq CPU_CONTEXT_CONTROL_R12(rax), r12 + movq CPU_CONTEXT_CONTROL_R13(rax), r13 + movq CPU_CONTEXT_CONTROL_R14(rax), r14 + movq CPU_CONTEXT_CONTROL_R15(rax), r15 + + movl CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE(rax), %edx + movl %edx, PER_CPU_ISR_DISPATCH_DISABLE(rcx) /* XXX: TLS - load GDT and refresh FS segment selector */ @@ -100,7 +148,8 @@ PUBLIC(_CPU_Context_restore) SYM(_CPU_Context_restore): movq NEWCONTEXT_ARG, rax /* rax = running threads context */ - jmp restore + GET_SELF_CPU_CONTROL_RCX /* rcx = per CPU information */ + jmp .restore END_CODE END diff --git a/spec/build/bsps/x86_64/amd64/grp.yml b/spec/build/bsps/x86_64/amd64/grp.yml index 0cab2c2526..9dfa64b16d 100644 --- a/spec/build/bsps/x86_64/amd64/grp.yml +++ b/spec/build/bsps/x86_64/amd64/grp.yml @@ -16,6 +16,8 @@ links: uid: abi - role: build-dependency uid: obj +- role: build-dependency + uid: objsmp - role: build-dependency uid: start - role: build-dependency diff --git a/spec/build/bsps/x86_64/amd64/obj.yml b/spec/build/bsps/x86_64/amd64/obj.yml index 4ed08e50b8..4a56081c60 100644 --- a/spec/build/bsps/x86_64/amd64/obj.yml +++ b/spec/build/bsps/x86_64/amd64/obj.yml @@ -14,6 +14,7 @@ install: - bsps/x86_64/amd64/include/bsp.h - bsps/x86_64/amd64/include/clock.h - bsps/x86_64/amd64/include/freebsd_loader.h + - bsps/x86_64/amd64/include/gdt.h - bsps/x86_64/amd64/include/pic.h - bsps/x86_64/amd64/include/start.h - destination: ${BSP_INCLUDEDIR}/acpi @@ -47,5 +48,6 @@ source: - bsps/x86_64/amd64/interrupts/pic.c - bsps/x86_64/amd64/start/bspstart.c - bsps/x86_64/amd64/start/bspreset.c +- bsps/x86_64/amd64/start/gdt.c - bsps/x86_64/amd64/start/page.c type: build diff --git a/spec/build/bsps/x86_64/amd64/objsmp.yml b/spec/build/bsps/x86_64/amd64/objsmp.yml new file mode 100644 index 0000000000..661cc137a5 --- /dev/null +++ b/spec/build/bsps/x86_64/amd64/objsmp.yml @@ -0,0 +1,19 @@ +SPDX-License-Identifier: CC-BY-SA-4.0 OR BSD-2-Clause +build-type: objects +cflags: [] +copyrights: +- Copyright (C) 2024 Matheus Pecoraro +cppflags: [] +cxxflags: [] +enabled-by: +- RTEMS_SMP +includes: [] +install: +- destination: ${BSP_INCLUDEDIR} + source: + - bsps/x86_64/amd64/include/smp.h +links: [] +source: +- bsps/x86_64/amd64/start/ap_trampoline.S +- bsps/x86_64/amd64/start/bspsmp.c +type: build diff --git a/spec/build/cpukit/optsmp.yml b/spec/build/cpukit/optsmp.yml index 8d060df3fa..b965cdf804 100644 --- a/spec/build/cpukit/optsmp.yml +++ b/spec/build/cpukit/optsmp.yml @@ -52,6 +52,7 @@ enabled-by: - sparc/gr712rc - sparc/gr740 - sparc/leon3 +- x86_64/amd64 links: [] name: RTEMS_SMP type: build