x86_64/amd64: Add SMP support

Adds SMP support for the x86_64 amd64 BSP.
This commit is contained in:
Matheus Pecoraro
2024-08-06 17:42:11 -03:00
committed by Joel Sherrill
parent 43a581c712
commit e93bbe6534
22 changed files with 1067 additions and 152 deletions

View File

@@ -44,6 +44,7 @@
#include <rtems/score/cpuimpl.h>
#include <rtems/score/x86_64.h>
#include <bsp/irq-generic.h>
#include <rtems/score/smpimpl.h>
static struct timecounter amd64_clock_tc;
@@ -61,6 +62,14 @@ static void lapic_timer_isr(void *param)
lapic_eoi();
}
#ifdef RTEMS_SMP
static void smp_lapic_timer_enable(void* arg)
{
uint32_t* lapic_reload_value = (uint32_t*) arg;
lapic_timer_enable(*lapic_reload_value);
}
#endif
void lapic_timer_install_handler(void)
{
rtems_status_code sc = rtems_interrupt_handler_install(
@@ -84,7 +93,12 @@ void amd64_clock_driver_initialize(void)
);
/* Setup and initialize the Local APIC timer */
lapic_timer_initialize(irq_ticks_per_sec);
uint32_t lapic_reload_value = lapic_timer_calc_ticks(irq_ticks_per_sec);
#ifdef RTEMS_SMP
_SMP_Broadcast_action(smp_lapic_timer_enable, &lapic_reload_value);
#else
lapic_timer_enable(lapic_reload_value);
#endif
amd64_clock_tc.tc_get_timecount = amd64_clock_get_timecount;
amd64_clock_tc.tc_counter_mask = 0xffffffff;

View File

@@ -47,15 +47,19 @@ extern "C" {
/* Value to hardware-enable the APIC through the APIC_BASE_MSR */
#define APIC_BASE_MSR_ENABLE 0x800
#define xAPIC_MAX_APIC_ID 0xFE
/*
* Since the LAPIC registers are contained in an array of 32-bit elements
* these byte-offsets need to be divided by 4 to index the array.
*/
#define LAPIC_OFFSET(val) (val >> 2)
#define LAPIC_REGISTER_APICID LAPIC_OFFSET(0x20)
#define LAPIC_REGISTER_ID LAPIC_OFFSET(0x20)
#define LAPIC_REGISTER_EOI LAPIC_OFFSET(0x0B0)
#define LAPIC_REGISTER_SPURIOUS LAPIC_OFFSET(0x0F0)
#define LAPIC_REGISTER_ICR_LOW LAPIC_OFFSET(0x300)
#define LAPIC_REGISTER_ICR_HIGH LAPIC_OFFSET(0x310)
#define LAPIC_REGISTER_LVT_TIMER LAPIC_OFFSET(0x320)
#define LAPIC_REGISTER_TIMER_INITCNT LAPIC_OFFSET(0x380)
#define LAPIC_REGISTER_TIMER_CURRCNT LAPIC_OFFSET(0x390)
@@ -63,6 +67,14 @@ extern "C" {
#define LAPIC_LVT_MASK 0x10000
#define LAPIC_ICR_HIGH_MASK 0x00FFFFFF
#define LAPIC_ICR_LOW_MASK 0xFFF32000
#define LAPIC_ICR_DELIV_INIT 0x500
#define LAPIC_ICR_DELIV_START 0x600
#define LAPIC_ICR_DELIV_STAT_PEND 0x1000
#define LAPIC_ICR_ASSERT 0x4000
#define LAPIC_ICR_TRIG_LEVEL 0x8000
#define LAPIC_EOI_ACK 0
#define LAPIC_SELECT_TMR_PERIODIC 0x20000
#define LAPIC_SPURIOUS_ENABLE 0x100
@@ -74,7 +86,7 @@ extern "C" {
/* Value to set in register to pick the divide value above */
#define LAPIC_TIMER_SELECT_DIVIDER 3
/* PIT defines used during LAPIC timer calibration */
/* PIT defines and macros used when calibrating the LAPIC timer and starting APs */
#define PIT_FREQUENCY 1193180
/*
* The PIT_FREQUENCY determines how many times the PIT counter is decremented
@@ -119,7 +131,44 @@ RTEMS_STATIC_ASSERT(
#define PIT_SELECT_ONE_SHOT_MODE 0b00000010
#define PIT_SELECT_BINARY_MODE 0
#define PIT_CHAN2_ENABLE(chan2_value) \
/* Enable the channel 2 timer gate and disable the speaker output */ \
chan2_value = (inport_byte(PIT_PORT_CHAN2_GATE) | PIT_CHAN2_TIMER_BIT) \
& ~PIT_CHAN2_SPEAKER_BIT; \
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \
/* Initialize PIT in one-shot mode on Channel 2 */ \
outport_byte( \
PIT_PORT_MCR, \
PIT_SELECT_CHAN2 | PIT_SELECT_ACCESS_LOHI | \
PIT_SELECT_ONE_SHOT_MODE | PIT_SELECT_BINARY_MODE \
); \
#define PIT_CHAN2_WRITE_TICKS(pit_ticks) \
/* Set PIT reload value */ \
outport_byte(PIT_PORT_CHAN2, pit_ticks & 0xff); \
stub_io_wait(); \
outport_byte(PIT_PORT_CHAN2, (pit_ticks >> 8) & 0xff); \
#define PIT_CHAN2_START_DELAY(chan2_value) \
/* Restart PIT by disabling the gated input and then re-enabling it */ \
chan2_value &= ~PIT_CHAN2_TIMER_BIT; \
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \
chan2_value |= PIT_CHAN2_TIMER_BIT; \
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value); \
#define PIT_CHAN2_WAIT_DELAY(pit_ticks) \
do { \
uint32_t curr_ticks = pit_ticks; \
while ( curr_ticks <= pit_ticks ) { \
/* Send latch command to read multi-byte value atomically */ \
outport_byte(PIT_PORT_MCR, PIT_SELECT_CHAN2); \
curr_ticks = inport_byte(PIT_PORT_CHAN2); \
curr_ticks |= inport_byte(PIT_PORT_CHAN2) << 8; \
} \
} while(0); \
extern volatile uint32_t* amd64_lapic_base;
extern uint8_t amd64_lapic_to_cpu_map[xAPIC_MAX_APIC_ID + 1];
/**
* @brief Initializes the Local APIC by hardware and software enabling it.
@@ -133,14 +182,56 @@ extern volatile uint32_t* amd64_lapic_base;
bool lapic_initialize(void);
/**
* @brief Initializes the Local APIC timer
* @brief Calculates the number of Local APIC timer ticks which can be used
* with lapic_timer_enable to set up a timer of given frequency.
*
* Calibrates and initializes the Local APIC timer configuring it to
* periodically generate interrupts on vector BSP_VECTOR_APIC_TIMER
* @param desired_freq_hz The frequency in Hz.
*
* @param desired_freq_hz The desired frequency of the Local APIC timer
* @return The number of Local APIC timer ticks.
*/
void lapic_timer_initialize(uint64_t desired_freq_hz);
uint32_t lapic_timer_calc_ticks(uint64_t desired_freq_hz);
/**
* @brief Enables the Local APIC timer.
*
* @param reload_value Number of ticks per interrupt.
*/
void lapic_timer_enable(uint32_t reload_value);
#ifdef RTEMS_SMP
/**
* @brief Retrieves the number of available processors in the system
*
* @return Number of available processors
*/
uint32_t lapic_get_num_of_procesors(void);
/**
* @brief Sends an interprocessor interrupt to a specified processor.
*
* @param target_cpu_index The processor index of the target processor.
* @param isr_vector The vector of the interrupt being sent.
*/
void lapic_send_ipi(uint32_t target_cpu_index, uint8_t isr_vector);
/**
* @brief Starts the Application Processor that corresponds to cpu_index.
*
* @param cpu_index The processor to be started.
* @param page_vector The under 1MB 4KB page where the trampoline code is located.
*/
void lapic_start_ap(uint32_t cpu_index, uint8_t page_vector);
#endif
/**
* @brief Retrieves the Local APIC ID
* @return Local APIC ID
*/
uint8_t inline lapic_get_id(void)
{
/* ID stored in highest 8 bits */
return amd64_lapic_base[LAPIC_REGISTER_ID]>>24;
}
/**
* @brief Signals an end of interrupt to the Local APIC

View File

@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/**
* @file
*
* @ingroup RTEMSBSPsX8664AMD64
*
* @brief Global Descriptor Table
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GDT_H
#define GDT_H
#define GDT_CODE_SEG_OFFSET 0x8
#define GDT_DATA_SEG_OFFSET 0x10
#ifndef ASM
#include <rtems/score/basedefs.h>
#include <stdint.h>
typedef struct {
uint16_t limit_low; /* Lower 16 bits of the limit */
uint16_t base_low; /* Lower 16 bits of the base */
uint8_t base_middle; /* Next 8 bits of the base */
uint8_t access; /* Access flags */
uint8_t gran_limit_middle; /* Granularity flags and upper 4 bits of the limit */
uint8_t base_high; /* Last 8 bits of the base */
} RTEMS_PACKED gdt_entry;
typedef struct {
uint16_t size;
uint64_t addr;
} RTEMS_PACKED gdt_desc;
extern const gdt_desc amd64_gdt_descriptor;
#endif // ASM
#endif // GDT_H

View File

@@ -0,0 +1,47 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/**
* @file
*
* @ingroup RTEMSBSPsX8664AMD64
*
* @brief BSP SMP definitions
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#define TRAMPOLINE_ADDR 0x8000
#define TRAMPOLINE_PAGE_VECTOR (TRAMPOLINE_ADDR>>12)
#define TRAMPOLINE_AP_FLAG_OFFSET 0x10
#define WAIT_FOR_AP_TIMEOUT_MS 10
#ifndef ASM
/**
* @brief This routine will be called by the trampoline code
*/
RTEMS_NO_RETURN void smp_init_ap(void);
#endif

View File

@@ -44,6 +44,13 @@ extern void apic_spurious_handler(void);
volatile uint32_t* amd64_lapic_base;
#ifdef RTEMS_SMP
/* Maps the processor index to the Local APIC ID */
uint8_t amd64_lapic_to_cpu_map[xAPIC_MAX_APIC_ID + 1];
static uint8_t cpu_to_lapic_map[xAPIC_MAX_APIC_ID + 1];
static uint32_t lapic_count = 0;
#endif
/**
* @brief Returns wheather the system contains a local APIC or not.
*
@@ -68,6 +75,19 @@ static bool has_lapic_support(void)
static void madt_subtables_handler(ACPI_SUBTABLE_HEADER* entry)
{
switch (entry->Type) {
#ifdef RTEMS_SMP
case ACPI_MADT_TYPE_LOCAL_APIC:
ACPI_MADT_LOCAL_APIC* lapic_entry = (ACPI_MADT_LOCAL_APIC*) entry;
if (lapic_count >= xAPIC_MAX_APIC_ID + 1 ||
lapic_get_id() == lapic_entry->Id) {
break;
}
amd64_lapic_to_cpu_map[lapic_entry->Id] = (uint8_t) lapic_count;
cpu_to_lapic_map[lapic_count++] = lapic_entry->Id;
break;
#endif
case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE:
ACPI_MADT_LOCAL_APIC_OVERRIDE* lapic_override =
(ACPI_MADT_LOCAL_APIC_OVERRIDE*) entry;
@@ -93,6 +113,13 @@ static bool parse_madt(void)
return false;
}
#ifdef RTEMS_SMP
/* Ensure the boot processor is cpu index 0 */
uint8_t lapic_id = lapic_get_id();
amd64_lapic_to_cpu_map[lapic_id] = (uint8_t) lapic_count;
cpu_to_lapic_map[lapic_count++] = lapic_id;
#endif
amd64_lapic_base = (uint32_t*) ((uintptr_t) madt->Address);
acpi_walk_subtables(
(ACPI_TABLE_HEADER*) madt,
@@ -103,23 +130,19 @@ static bool parse_madt(void)
return true;
}
static uint32_t lapic_timer_calibrate(void)
/**
* @brief Calculates the amount of LAPIC timer ticks per second using the PIT.
*
* @return The amount of ticks per second calculated.
*/
static uint32_t lapic_timer_calc_ticks_per_sec(void)
{
/* Configure LAPIC timer in one-shot mode to prepare for calibration */
amd64_lapic_base[LAPIC_REGISTER_LVT_TIMER] = BSP_VECTOR_APIC_TIMER;
amd64_lapic_base[LAPIC_REGISTER_TIMER_DIV] = LAPIC_TIMER_SELECT_DIVIDER;
/* Enable the channel 2 timer gate and disable the speaker output */
uint8_t chan2_value = (inport_byte(PIT_PORT_CHAN2_GATE) | PIT_CHAN2_TIMER_BIT)
& ~PIT_CHAN2_SPEAKER_BIT;
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value);
/* Initialize PIT in one-shot mode on Channel 2 */
outport_byte(
PIT_PORT_MCR,
PIT_SELECT_CHAN2 | PIT_SELECT_ACCESS_LOHI |
PIT_SELECT_ONE_SHOT_MODE | PIT_SELECT_BINARY_MODE
);
uint8_t chan2_value;
PIT_CHAN2_ENABLE(chan2_value);
/*
* Make sure interrupts are disabled while we calibrate for 2 reasons:
@@ -132,31 +155,16 @@ static uint32_t lapic_timer_calibrate(void)
rtems_interrupt_level level;
rtems_interrupt_local_disable(level);
/* Set PIT reload value */
uint32_t pit_ticks = PIT_CALIBRATE_TICKS;
uint8_t low_tick = pit_ticks & 0xff;
uint8_t high_tick = (pit_ticks >> 8) & 0xff;
outport_byte(PIT_PORT_CHAN2, low_tick);
stub_io_wait();
outport_byte(PIT_PORT_CHAN2, high_tick);
PIT_CHAN2_WRITE_TICKS(pit_ticks);
/* Start LAPIC timer's countdown */
const uint32_t lapic_calibrate_init_count = 0xffffffff;
/* Restart PIT by disabling the gated input and then re-enabling it */
chan2_value &= ~PIT_CHAN2_TIMER_BIT;
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value);
chan2_value |= PIT_CHAN2_TIMER_BIT;
outport_byte(PIT_PORT_CHAN2_GATE, chan2_value);
PIT_CHAN2_START_DELAY(chan2_value);
amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = lapic_calibrate_init_count;
while ( pit_ticks <= PIT_CALIBRATE_TICKS ) {
/* Send latch command to read multi-byte value atomically */
outport_byte(PIT_PORT_MCR, PIT_SELECT_CHAN2);
pit_ticks = inport_byte(PIT_PORT_CHAN2);
pit_ticks |= inport_byte(PIT_PORT_CHAN2) << 8;
}
PIT_CHAN2_WAIT_DELAY(pit_ticks);
uint32_t lapic_currcnt = amd64_lapic_base[LAPIC_REGISTER_TIMER_CURRCNT];
DBG_PRINTF("PIT stopped at 0x%" PRIx32 "\n", pit_ticks);
@@ -193,6 +201,30 @@ static uint32_t lapic_timer_calibrate(void)
return lapic_ticks_per_sec;
}
#ifdef RTEMS_SMP
/**
* @brief Sends an interprocessor interrupt
*
* @param dest_id Local APIC ID of the destination
* @param icr_low The flags to be written to the low value of the ICR
*/
static void send_ipi(uint8_t dest_id, uint32_t icr_low)
{
amd64_lapic_base[LAPIC_REGISTER_ICR_HIGH] =
(amd64_lapic_base[LAPIC_REGISTER_ICR_HIGH] & LAPIC_ICR_HIGH_MASK) | (dest_id << 24);
amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] =
(amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] & LAPIC_ICR_LOW_MASK) | icr_low;
}
static void wait_ipi(void)
{
while (amd64_lapic_base[LAPIC_REGISTER_ICR_LOW] & LAPIC_ICR_DELIV_STAT_PEND) {
amd64_spinwait();
}
}
#endif
bool lapic_initialize(void)
{
if (has_lapic_support() == false || parse_madt() == false) {
@@ -210,8 +242,8 @@ bool lapic_initialize(void)
DBG_PRINTF("APIC is at 0x%" PRIxPTR "\n", (uintptr_t) amd64_lapic_base);
DBG_PRINTF(
"APIC ID at *0x%" PRIxPTR "=0x%" PRIx32 "\n",
(uintptr_t) &amd64_lapic_base[LAPIC_REGISTER_APICID],
amd64_lapic_base[LAPIC_REGISTER_APICID]
(uintptr_t) &amd64_lapic_base[LAPIC_REGISTER_ID],
amd64_lapic_base[LAPIC_REGISTER_ID]
);
DBG_PRINTF(
@@ -248,14 +280,12 @@ bool lapic_initialize(void)
return true;
}
void lapic_timer_initialize(uint64_t desired_freq_hz)
uint32_t lapic_timer_calc_ticks(uint64_t desired_freq_hz)
{
uint32_t lapic_ticks_per_sec = 0;
uint32_t lapic_tick_collections[LAPIC_TIMER_NUM_CALIBRATIONS] = {0};
uint64_t lapic_tick_total = 0;
for (uint32_t i = 0; i < LAPIC_TIMER_NUM_CALIBRATIONS; i++) {
lapic_tick_collections[i] = lapic_timer_calibrate();
lapic_tick_total += lapic_tick_collections[i];
lapic_tick_total += lapic_timer_calc_ticks_per_sec();
}
lapic_ticks_per_sec = lapic_tick_total / LAPIC_TIMER_NUM_CALIBRATIONS;
@@ -264,14 +294,73 @@ void lapic_timer_initialize(uint64_t desired_freq_hz)
* frequency (and we use a frequency divider).
*
* This means:
* apic_ticks_per_sec = (cpu_bus_frequency / timer_divide_value)
* lapic_ticks_per_sec = (cpu_bus_frequency / timer_divide_value)
*
* Therefore:
* reload_value = apic_ticks_per_sec / desired_freq_hz
* reload_value = lapic_ticks_per_sec / desired_freq_hz
*/
uint32_t lapic_timer_reload_value = lapic_ticks_per_sec / desired_freq_hz;
return lapic_ticks_per_sec / desired_freq_hz;
}
void lapic_timer_enable(uint32_t reload_value)
{
amd64_lapic_base[LAPIC_REGISTER_LVT_TIMER] = BSP_VECTOR_APIC_TIMER | LAPIC_SELECT_TMR_PERIODIC;
amd64_lapic_base[LAPIC_REGISTER_TIMER_DIV] = LAPIC_TIMER_SELECT_DIVIDER;
amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = lapic_timer_reload_value;
amd64_lapic_base[LAPIC_REGISTER_TIMER_INITCNT] = reload_value;
}
#ifdef RTEMS_SMP
uint32_t lapic_get_num_of_procesors(void)
{
return lapic_count;
}
void lapic_send_ipi(uint32_t target_cpu_index, uint8_t isr_vector)
{
uint8_t target_lapic_id = cpu_to_lapic_map[target_cpu_index];
send_ipi(target_lapic_id, isr_vector);
wait_ipi();
}
/**
* This routine attempts to follow the algorithm described in the
* Intel Multiprocessor Specification v1.4 in section B.4.
*/
void lapic_start_ap(uint32_t cpu_index, uint8_t page_vector)
{
if (cpu_index >= lapic_count) {
return;
}
uint8_t lapic_id = cpu_to_lapic_map[cpu_index];
uint8_t chan2_value;
PIT_CHAN2_ENABLE(chan2_value);
uint32_t pit_ticks = PIT_FREQUENCY/100; /* 10 miliseconds */
PIT_CHAN2_WRITE_TICKS(pit_ticks);
/* INIT IPI */
send_ipi(lapic_id, LAPIC_ICR_DELIV_INIT | LAPIC_ICR_ASSERT | LAPIC_ICR_TRIG_LEVEL);
wait_ipi();
/* Deassert INIT IPI */
send_ipi(lapic_id, LAPIC_ICR_DELIV_INIT | LAPIC_ICR_TRIG_LEVEL);
/* Wait 10ms */
PIT_CHAN2_START_DELAY(chan2_value);
PIT_CHAN2_WAIT_DELAY(pit_ticks);
pit_ticks = PIT_FREQUENCY/5000; /* 200 microseconds */
PIT_CHAN2_WRITE_TICKS(pit_ticks);
/* STARTUP IPI */
send_ipi(lapic_id, LAPIC_ICR_DELIV_START | page_vector);
wait_ipi();
/* Wait 200us */
PIT_CHAN2_START_DELAY(chan2_value);
PIT_CHAN2_WAIT_DELAY(pit_ticks);
/**
* It is possible that the first STARTUP IPI sent is ignored
* so we send it twice.
*/
send_ipi(lapic_id, LAPIC_ICR_DELIV_START | page_vector);
}
#endif

View File

@@ -40,6 +40,7 @@
#include <rtems/score/basedefs.h>
#include <rtems/score/x86_64.h>
#include <rtems/score/cpuimpl.h>
#include <rtems/score/processormaskimpl.h>
#include <stdint.h>
@@ -49,7 +50,7 @@
*/
interrupt_descriptor amd64_idt[IDT_SIZE] RTEMS_ALIGNED(8) = { { 0 } };
struct idt_record idtr = {
struct idt_record amd64_idtr = {
.limit = (IDT_SIZE * 16) - 1,
.base = (uintptr_t) amd64_idt
};
@@ -91,7 +92,8 @@ static uintptr_t rtemsIRQs[BSP_IRQ_VECTOR_NUMBER] = {
(uintptr_t) rtems_irq_prologue_29,
(uintptr_t) rtems_irq_prologue_30,
(uintptr_t) rtems_irq_prologue_31,
(uintptr_t) rtems_irq_prologue_32
(uintptr_t) rtems_irq_prologue_32,
(uintptr_t) rtems_irq_prologue_33
};
void lidt(struct idt_record *ptr)
@@ -147,7 +149,7 @@ void bsp_interrupt_facility_initialize(void)
amd64_install_raw_interrupt(i, rtemsIRQs[i], &old);
}
lidt(&idtr);
lidt(&amd64_idtr);
if (lapic_initialize() == false) {
bsp_fatal(BSP_FATAL_INTERRUPT_INITIALIZATION);
@@ -156,7 +158,7 @@ void bsp_interrupt_facility_initialize(void)
rtems_status_code bsp_interrupt_vector_disable(rtems_vector_number vector)
{
/* XXX */
/* XXX: Should be implemented once I/O APIC support is added */
return RTEMS_SUCCESSFUL;
}
@@ -223,6 +225,37 @@ rtems_status_code bsp_interrupt_vector_is_enabled(
rtems_status_code bsp_interrupt_vector_enable(rtems_vector_number vector)
{
/* XXX */
/* XXX: Should be implemented once I/O APIC support is added */
return RTEMS_SUCCESSFUL;
}
#ifdef RTEMS_SMP
rtems_status_code bsp_interrupt_get_affinity(
rtems_vector_number vector,
Processor_mask *affinity
)
{
(void) vector;
_Processor_mask_From_index( affinity, 0 );
return RTEMS_UNSATISFIED;
}
rtems_status_code bsp_interrupt_set_affinity(
rtems_vector_number vector,
const Processor_mask *affinity
)
{
(void) vector;
(void) affinity;
return RTEMS_UNSATISFIED;
}
rtems_status_code bsp_interrupt_raise_on(
rtems_vector_number vector,
uint32_t cpu_index
)
{
bsp_interrupt_assert(bsp_interrupt_is_valid_vector(vector));
return RTEMS_UNSATISFIED;
}
#endif

View File

@@ -4,6 +4,7 @@
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
* Copyright (c) 2018.
* Amaan Cheval <amaan.cheval@gmail.com>
*
@@ -48,8 +49,8 @@ SYM(apic_spurious_handler):
* interrupts as persistent scratch registers (i.e. calls will not destroy
* them), as long as we also save and restore it for the interrupted task.
*/
.set SCRATCH_REG0, rbp
.set SCRATCH_REG1, rbx
.set SCRATCH_REG0, rbp
.set SCRATCH_REG1, rbx
/*
* We need to set a distinct handler for every interrupt vector so that
@@ -98,6 +99,7 @@ DISTINCT_INTERRUPT_ENTRY(29)
DISTINCT_INTERRUPT_ENTRY(30)
DISTINCT_INTERRUPT_ENTRY(31)
DISTINCT_INTERRUPT_ENTRY(32)
DISTINCT_INTERRUPT_ENTRY(33)
SYM(_ISR_Handler):
.save_cpu_interrupt_frame:
@@ -105,36 +107,32 @@ SYM(_ISR_Handler):
movq rsp, SAVED_RSP
/* Make space for CPU_Interrupt_frame */
subq $CPU_INTERRUPT_FRAME_SIZE, rsp
subq $CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE, rsp
.set ALIGNMENT_MASK, ~(CPU_STACK_ALIGNMENT - 1)
andq $ALIGNMENT_MASK, rsp
// XXX: Save interrupt mask?
/* Save caller-saved registers to CPU_Interrupt_frame */
movq rax, (8 * CPU_SIZEOF_POINTER)(rsp)
movq rcx, (7 * CPU_SIZEOF_POINTER)(rsp)
movq rdx, (6 * CPU_SIZEOF_POINTER)(rsp)
movq rsi, (5 * CPU_SIZEOF_POINTER)(rsp)
movq r8, (4 * CPU_SIZEOF_POINTER)(rsp)
movq r9, (3 * CPU_SIZEOF_POINTER)(rsp)
movq r10, (2 * CPU_SIZEOF_POINTER)(rsp)
movq r11, (1 * CPU_SIZEOF_POINTER)(rsp)
/* Save the initial rsp */
movq SAVED_RSP, (0 * CPU_SIZEOF_POINTER)(rsp)
/* Save x87 FPU, MMX and SSE state */
.set FXSAVE_SIZE, 512
/* Make space for FXSAVE */
subq $FXSAVE_SIZE, rsp
fwait
fxsave64 (rsp)
fxsave64 (CPU_INTERRUPT_FRAME_SSE_STATE)(rsp)
/* Reset to a clean state */
fninit
subq $4, rsp
movl $0x1F80, (rsp)
ldmxcsr (rsp)
addq $4, rsp
/* Use CPU_INTERRUPT_FRAME_RAX as scratch space */
movl $0x1F80, (CPU_INTERRUPT_FRAME_RAX)(rsp)
ldmxcsr (CPU_INTERRUPT_FRAME_RAX)(rsp)
/* Save caller-saved registers to CPU_Interrupt_frame */
movq rax, (CPU_INTERRUPT_FRAME_RAX)(rsp)
movq rcx, (CPU_INTERRUPT_FRAME_RCX)(rsp)
movq rdx, (CPU_INTERRUPT_FRAME_RDX)(rsp)
movq rsi, (CPU_INTERRUPT_FRAME_RSI)(rsp)
movq r8, (CPU_INTERRUPT_FRAME_R8)(rsp)
movq r9, (CPU_INTERRUPT_FRAME_R9)(rsp)
movq r10, (CPU_INTERRUPT_FRAME_R10)(rsp)
movq r11, (CPU_INTERRUPT_FRAME_R11)(rsp)
/* Save the initial rsp */
movq SAVED_RSP, (CPU_INTERRUPT_FRAME_RSP)(rsp)
.switch_stack_if_needed:
/* Save current aligned rsp so we can find CPU_Interrupt_frame again later */
@@ -145,18 +143,15 @@ SYM(_ISR_Handler):
* outermost interrupt, which means we've been using the task's stack so far
*/
#ifdef RTEMS_SMP
/* XXX: We should call _CPU_SMP_Get_current_processor here */
#endif
.set Per_CPU_Info, SCRATCH_REG1
movq $SYM(_Per_CPU_Information), Per_CPU_Info
cmpq $0, PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
GET_SELF_CPU_CONTROL_RBX /* SCRATCH_REG1 == rbx */
cmpl $0, PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
jne .skip_switch
.switch_stack:
movq PER_CPU_INTERRUPT_STACK_HIGH(Per_CPU_Info), rsp
.skip_switch:
incq PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
incq PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
incl PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
incl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
.call_isr_dispatch:
/* REG_ARG0 already includes the vector number, so we can simply call */
@@ -166,37 +161,61 @@ SYM(_ISR_Handler):
/* If this is the outermost stack, this restores the task stack */
movq SAVED_RSP, rsp
decq PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
decq PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
/* XXX: Bug in QEMU causing ZF to not be set by decq necessitating the cmpb */
cmpb $0, PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
/* If dispatch is non-zero, it is disabled, so skip scheduling it */
decl PER_CPU_ISR_NEST_LEVEL(Per_CPU_Info)
decl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
movl PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info), %eax
orl PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info), %eax
/**
* If either thread dispatch disable level or ISR dispatch disable
* are non-zero skip scheduling the dispatch
*/
cmpl $0, %eax
jne .restore_cpu_interrupt_frame
/* Only call _Thread_Do_dispatch if dispatch needed is not 0 */
cmpb $0, PER_CPU_DISPATCH_NEEDED(Per_CPU_Info)
je .restore_cpu_interrupt_frame
.schedule_dispatch:
/* Set ISR dispatch disable and thread dispatch disable level to one */
movl $1, PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info)
movl $1, PER_CPU_THREAD_DISPATCH_DISABLE_LEVEL(Per_CPU_Info)
/* Call Thread_Do_dispatch(), this function will enable interrupts */
movq Per_CPU_Info, REG_ARG0
movq $CPU_ISR_LEVEL_ENABLED, REG_ARG1 /* Set interrupt flag manually */
call _Thread_Do_dispatch
/* Disable interrupts */
cli
/**
* It is possible that after returning from _Thread_Do_dispatch the
* Per_CPU_Info has changed
*/
GET_SELF_CPU_CONTROL_RBX /* Per_CPU_Info == SCRATCH_REG1 == rbx */
cmpb $0, PER_CPU_DISPATCH_NEEDED(Per_CPU_Info)
je .restore_cpu_interrupt_frame
call _Thread_Dispatch
jne .schedule_dispatch
/* Done with thread dispatching */
movl $0, PER_CPU_ISR_DISPATCH_DISABLE(Per_CPU_Info)
.restore_cpu_interrupt_frame:
/* Restore x87 FPU, MMX and SSE state */
fwait
fxrstor64 (rsp)
/* Restore rsp to CPU_Interrupt_frame */
addq $FXSAVE_SIZE, rsp
fxrstor64 (CPU_INTERRUPT_FRAME_SSE_STATE)(rsp)
/* Restore registers from CPU_Interrupt_frame */
movq (8 * CPU_SIZEOF_POINTER)(rsp), rax
movq (7 * CPU_SIZEOF_POINTER)(rsp), rcx
movq (6 * CPU_SIZEOF_POINTER)(rsp), rdx
movq (5 * CPU_SIZEOF_POINTER)(rsp), rsi
movq (4 * CPU_SIZEOF_POINTER)(rsp), r8
movq (3 * CPU_SIZEOF_POINTER)(rsp), r9
movq (2 * CPU_SIZEOF_POINTER)(rsp), r10
movq (1 * CPU_SIZEOF_POINTER)(rsp), r11
movq (CPU_INTERRUPT_FRAME_RAX)(rsp), rax
movq (CPU_INTERRUPT_FRAME_RCX)(rsp), rcx
movq (CPU_INTERRUPT_FRAME_RDX)(rsp), rdx
movq (CPU_INTERRUPT_FRAME_RSI)(rsp), rsi
movq (CPU_INTERRUPT_FRAME_R8)(rsp), r8
movq (CPU_INTERRUPT_FRAME_R9)(rsp), r9
movq (CPU_INTERRUPT_FRAME_R10)(rsp), r10
movq (CPU_INTERRUPT_FRAME_R11)(rsp), r11
/* Restore the rsp value from just before _ISR_Handler was called */
movq (0 * CPU_SIZEOF_POINTER)(rsp), SAVED_RSP
movq (CPU_INTERRUPT_FRAME_RSP)(rsp), SAVED_RSP
movq SAVED_RSP, rsp
/* Restore args DISTINCT_INTERRUPT_ENTRY pushed to task stack */

View File

@@ -0,0 +1,147 @@
/*
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (C) 2024 Matheus Pecoraro
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* This file contains the trampoline code that will be executed by every
* Application Processor when first started.
*/
#include <gdt.h>
#include <smp.h>
#include <rtems/asm.h>
#include <rtems/score/percpu.h>
.set PM_GDT_CODE_OFFSET, 0x08 # Offset of code segment descriptor into GDT
.set PM_GDT_DATA_OFFSET, 0x10 # Offset of data segment descriptor into GDT
.set CR0_PE, 1 # Protected mode flag on CR0 register
.set CR0_PG, 0x80000000 # Paging flag on CR0 register
.set CR0_EM_BITMASK, (~0x4) # Bitmask for disabling x87 FPU Emulation bit
.set CR0_MP, 0x2 # Monitor co-processor flag
.set CR4_PAE, 0x20 # Physical Address Extension flag on CR4 register
.set CR4_OSFXSR, 0x200 # OS support for FXSAVE and FXRSTOR flag
.set CR4_OSXMMEXCPT, 0x400 # OS support for unmasked SIMD FP exceptions flag
.set CR4_SSEFLAGS, (CR4_OSFXSR | CR4_OSXMMEXCPT)
.set EFER_MSR, 0xC0000080 # EFER MSR number
.set EFER_MSR_LME, 0x100 # Long Mode Enable flag on the EFER MSR
BEGIN_CODE
.code16
PUBLIC(_Trampoline_start)
SYM(_Trampoline_start):
cli
cld
jmp .real_mode
.real_mode:
lgdt (gdt_desc - _Trampoline_start) + TRAMPOLINE_ADDR
# Enter protected mode
movl %cr0, %eax
orl $CR0_PE, %eax
movl %eax, %cr0
# Jump to protected mode
ljmpl $PM_GDT_CODE_OFFSET, $((.protected_mode - _Trampoline_start) + TRAMPOLINE_ADDR)
.code32
.protected_mode:
# Load data segment registers
movw $PM_GDT_DATA_OFFSET, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
# Move PML4 table address to cr3
movl $amd64_pml4, %eax
movl %eax, %cr3
# Flip PAE bit in cr4
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
# Set LME on the EFER MSR
movl $EFER_MSR, %ecx
rdmsr
orl $EFER_MSR_LME, %eax
wrmsr
# Enable paging
movl %cr0, %eax
orl $CR0_PG, %eax
movl %eax, %cr0
# Update GDT for long mode
lgdt amd64_gdt_descriptor
# Jump to long mode
ljmp $GDT_CODE_SEG_OFFSET, $((.long_mode - _Trampoline_start) + TRAMPOLINE_ADDR)
.code64
.long_mode:
# Load data segment registers
movw $GDT_DATA_SEG_OFFSET, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
# Acquire the processor's stack
GET_SELF_CPU_CONTROL_RAX
movq PER_CPU_INTERRUPT_STACK_HIGH(rax), rsp
# Enable SSE
movq %cr0, rax
andq $CR0_EM_BITMASK, rax
orq $CR0_MP, rax
movq rax, %cr0
movq %cr4, rax
orq $CR4_SSEFLAGS, rax
movq rax, %cr4
# Exit trampoline code
movabsq $smp_init_ap, rax
call *rax
/* Temporary GDT used to get to long mode */
gdt:
/* NULL segment */
.quad 0
/* Code segment */
.word 0xFFFF, 0
.byte 0, 0x9F, 0xCF, 0
/* Data segment */
.word 0xFFFF, 0
.byte 0, 0x92, 0xCF, 0
gdt_desc:
.word (gdt_desc - gdt) - 1
.long (gdt - _Trampoline_start) + TRAMPOLINE_ADDR
trampoline_end:
PUBLIC(_Trampoline_size)
SYM(_Trampoline_size):
.quad (trampoline_end - _Trampoline_start)

View File

@@ -46,10 +46,15 @@ void bsp_reset( rtems_fatal_source source, rtems_fatal_code code )
(void) source;
(void) code;
/**
* It is possible that AcpiEnterSleepStatePrep causes a thread dispatch
* so we execute it with interrupts enabled
*/
amd64_enable_interrupts();
ACPI_STATUS status = AcpiEnterSleepStatePrep(ACPI_STATE_S5);
amd64_disable_interrupts();
if (status == AE_OK) {
amd64_disable_interrupts();
AcpiEnterSleepState(ACPI_STATE_S5);
}

View File

@@ -0,0 +1,151 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/**
* @file
*
* @ingroup RTEMSBSPsX8664AMD64
*
* @brief BSP SMP implementation
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <rtems/score/smpimpl.h>
#include <apic.h>
#include <assert.h>
#include <bsp.h>
#include <smp.h>
#include <libcpu/page.h>
#include <rtems/score/idt.h>
#include <string.h>
extern void _Trampoline_start(void);
extern const uint64_t _Trampoline_size;
static bool has_ap_started = false;
/**
* @brief Copies the trampoline code to the address where APs will boot from.
*
* TODO: We should ideally parse through the UEFI Memory Map to find a
* free page under 1MB
*/
static void copy_trampoline(void)
{
/* Copy the trampoline code to its destiny address */
void* trampoline_dest = (void*) TRAMPOLINE_ADDR;
memcpy(trampoline_dest, (void*) _Trampoline_start, _Trampoline_size);
}
/**
* @brief Waits for the Application Processor to set the flag.
*
* @param timeout_ms Timeout in miliseconds.
*
* @return true if successful.
*/
static bool wait_for_ap(uint32_t timeout_ms)
{
uint8_t chan2_value;
uint32_t pit_ticks = PIT_FREQUENCY/1000; /* a milisecond */
PIT_CHAN2_ENABLE(chan2_value);
PIT_CHAN2_WRITE_TICKS(pit_ticks);
for (int i = 0; has_ap_started == false && i < timeout_ms; i++) {
PIT_CHAN2_START_DELAY(chan2_value);
PIT_CHAN2_WAIT_DELAY(pit_ticks);
amd64_spinwait();
}
return has_ap_started;
}
static void bsp_inter_processor_interrupt(void* arg)
{
_SMP_Inter_processor_interrupt_handler(_Per_CPU_Get());
lapic_eoi();
}
void _CPU_SMP_Prepare_start_multitasking(void)
{
/* Do nothing */
}
bool _CPU_SMP_Start_processor(uint32_t cpu_index)
{
has_ap_started = false;
lapic_start_ap(cpu_index, TRAMPOLINE_PAGE_VECTOR);
return wait_for_ap(WAIT_FOR_AP_TIMEOUT_MS);
}
uint32_t _CPU_SMP_Get_current_processor(void)
{
uint8_t lapic_id = lapic_get_id();
return amd64_lapic_to_cpu_map[lapic_id];
}
uint32_t _CPU_SMP_Initialize(void)
{
copy_trampoline();
return lapic_get_num_of_procesors();
}
void _CPU_SMP_Finalize_initialization(uint32_t cpu_count)
{
if (cpu_count > 0) {
rtems_status_code sc = rtems_interrupt_handler_install(
BSP_VECTOR_IPI,
"IPI",
RTEMS_INTERRUPT_UNIQUE,
bsp_inter_processor_interrupt,
NULL
);
assert(sc == RTEMS_SUCCESSFUL);
}
}
void _CPU_SMP_Send_interrupt(uint32_t target_processor_index)
{
lapic_send_ipi(target_processor_index, BSP_VECTOR_IPI);
}
void smp_init_ap(void)
{
has_ap_started = true;
Context_Control_fp* null_fp_context_p = &_CPU_Null_fp_context;
_CPU_Context_restore_fp(&null_fp_context_p);
amd64_lapic_base[LAPIC_REGISTER_SPURIOUS] =
LAPIC_SPURIOUS_ENABLE | BSP_VECTOR_SPURIOUS;
lidt(&amd64_idtr);
_SMP_Start_multitasking_on_secondary_processor(_Per_CPU_Get());
}

View File

@@ -34,13 +34,13 @@
* SUCH DAMAGE.
*/
#include <assert.h>
#include <acpi/acpi.h>
#include <bsp.h>
#include <bsp/bootcard.h>
#include <libcpu/page.h>
#include <bsp/irq-generic.h>
#include <multiboot2impl.h>
#include <rtems/score/assert.h>
#if defined(BSP_USE_EFI_BOOT_SERVICES) && !defined(BSP_MULTIBOOT_SUPPORT)
#error "RTEMS amd64efi BSP requires multiboot2 support!"
@@ -53,7 +53,8 @@ void bsp_start(void)
if (!uefi_bootservices_running()) {
#endif
paging_init();
assert(acpi_tables_initialize());
bool acpi_table_result = acpi_tables_initialize();
_Assert(acpi_table_result);
bsp_interrupt_initialize();
#ifdef BSP_MULTIBOOT_SUPPORT
}

View File

@@ -0,0 +1,47 @@
/* SPDX-License-Identifier: BSD-2-Clause */
/**
* @file
*
* @ingroup RTEMSBSPsX8664AMD64
*
* @brief Global Descriptor Table
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <gdt.h>
static const gdt_entry gdt_entries[] = {
{0, 0, 0, 0, 0, 0}, /* NULL segment */
{0xFFFF, 0, 0, 0x9E, 0xAF, 0}, /* Code segment */
{0xFFFF, 0, 0, 0x92, 0xCF, 0} /* Data segment */
};
const gdt_desc amd64_gdt_descriptor = {
sizeof(gdt_entries) - 1,
(uint64_t) gdt_entries
};

View File

@@ -27,6 +27,7 @@
*/
#include <bspopts.h>
#include <gdt.h>
#ifdef BSP_MULTIBOOT_SUPPORT
#include <multiboot2.h>
@@ -41,12 +42,31 @@ _start:
.cfi_startproc
movq %rsp, %rbp
/*
* _ISR_Stack_area_end is aligned to CPU_INTERRUPT_STACK_ALIGNMENT (64 bits)
* by compiler directive at /cpukit/include/rtems/confdefs/percpu.h
* _ISR_Stack_size is aligned to CPU_INTERRUPT_STACK_ALIGNMENT (64 bits)
* by compiler directive at /cpukit/include/rtems/confdefs/percpu.h.
* No reference will occur beyond the stack region since the call
* instruction will decrement the %rsp and then save the value of %rip
* instruction will decrement the %rsp and then save the value of %rip.
*/
movabsq $_ISR_Stack_area_end, %rsp
movabsq $_ISR_Stack_area_begin, %rsp
addq $_ISR_Stack_size, %rsp
#ifndef BSP_USE_EFI_BOOT_SERVICES
/* Use our own GDT instead of the one set up by UEFI */
lgdt amd64_gdt_descriptor
/* Load data segment registers */
movw $GDT_DATA_SEG_OFFSET, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
/* Load code segment register */
pushq $GDT_CODE_SEG_OFFSET
pushq $after_load_gdt
retfq
after_load_gdt:
#endif
#ifndef BSP_MULTIBOOT_SUPPORT
/**

View File

@@ -40,6 +40,7 @@
#endif
#include <rtems/score/cpuopts.h>
#include <rtems/score/x86_64.h>
#include <rtems/score/percpu.h>
/**
* @defgroup RTEMSScoreCPUx86-64ASM x86-64 Assembler Support
@@ -151,6 +152,53 @@
*/
#define EXTERN(sym) .globl SYM (sym)
#ifdef RTEMS_SMP
/* REG32 must be the lower 32 bits of REG */
.macro GET_CPU_INDEX REG REG32
.set LAPIC_ID, 0x20
.set LAPIC_ID_SHIFT, 24
movq amd64_lapic_base, \REG
movl LAPIC_ID(\REG), \REG32
shrq $LAPIC_ID_SHIFT, \REG /* LAPIC_ID in REG */
movzbq amd64_lapic_to_cpu_map(\REG), \REG /* CPU ID in REG */
.endm
/* REG32 must be the lower 32 bits of REG */
.macro GET_SELF_CPU_CONTROL REG REG32
GET_CPU_INDEX \REG \REG32
shlq $PER_CPU_CONTROL_SIZE_LOG2, \REG /* Calculate offset for CPU structure */
leaq _Per_CPU_Information(\REG), \REG /* Address of info for current CPU in REG */
.endm
#else
.macro GET_CPU_INDEX REG REG32
movq $0, \REG
.endm
.macro GET_SELF_CPU_CONTROL REG REG32
leaq _Per_CPU_Information, \REG
.endm
#endif
/* Couldn't find a better way to do this under the GNU as macro limitations */
.macro GET_SELF_CPU_CONTROL_RAX
GET_SELF_CPU_CONTROL rax,%eax
.endm
.macro GET_SELF_CPU_CONTROL_RBX
GET_SELF_CPU_CONTROL rbx,%ebx
.endm
.macro GET_SELF_CPU_CONTROL_RCX
GET_SELF_CPU_CONTROL rcx,%ecx
.endm
.macro GET_SELF_CPU_CONTROL_RDX
GET_SELF_CPU_CONTROL rdx,%edx
.endm
.macro GET_SELF_CPU_CONTROL_RDI
GET_SELF_CPU_CONTROL rdi,%edi
.endm
.macro GET_SELF_CPU_CONTROL_RSI
GET_SELF_CPU_CONTROL rsi,%esi
.endm
#endif // _RTEMS_ASM_H
/** @} */

View File

@@ -7,6 +7,7 @@
*/
/*
* Copyright (C) 2024 Matheus Pecoraro
* Copyright (c) 2018.
* Amaan Cheval <amaan.cheval@gmail.com>
*
@@ -61,6 +62,20 @@ extern "C" {
#define CPU_EFLAGS_INTERRUPTS_ON 0x00003202
#define CPU_EFLAGS_INTERRUPTS_OFF 0x00003002
#define CPU_CONTEXT_CONTROL_EFLAGS 0
#define CPU_CONTEXT_CONTROL_RBX CPU_CONTEXT_CONTROL_EFLAGS + 8
#define CPU_CONTEXT_CONTROL_RSP CPU_CONTEXT_CONTROL_RBX + 8
#define CPU_CONTEXT_CONTROL_RBP CPU_CONTEXT_CONTROL_RSP + 8
#define CPU_CONTEXT_CONTROL_R12 CPU_CONTEXT_CONTROL_RBP + 8
#define CPU_CONTEXT_CONTROL_R13 CPU_CONTEXT_CONTROL_R12 + 8
#define CPU_CONTEXT_CONTROL_R14 CPU_CONTEXT_CONTROL_R13 + 8
#define CPU_CONTEXT_CONTROL_R15 CPU_CONTEXT_CONTROL_R14 + 8
#define CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE CPU_CONTEXT_CONTROL_R15 + 8
#define CPU_CONTEXT_CONTROL_IS_EXECUTING CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE + 4
#ifndef ASM
typedef struct {
@@ -78,10 +93,12 @@ typedef struct {
uint64_t r14;
uint64_t r15;
uint32_t isr_dispatch_disable;
// XXX: FS segment descriptor for TLS
#ifdef RTEMS_SMP
volatile bool is_executing;
volatile uint16_t is_executing;
#endif
} Context_Control;
@@ -97,17 +114,41 @@ typedef struct {
#define _CPU_Context_Get_SP( _context ) \
(_context)->rsp
#endif /* !ASM */
#define CPU_INTERRUPT_FRAME_SSE_STATE 0
#define CPU_INTERRUPT_FRAME_RAX CPU_INTERRUPT_FRAME_SSE_STATE + 512
#define CPU_INTERRUPT_FRAME_RCX CPU_INTERRUPT_FRAME_RAX + 8
#define CPU_INTERRUPT_FRAME_RDX CPU_INTERRUPT_FRAME_RCX + 8
#define CPU_INTERRUPT_FRAME_RSI CPU_INTERRUPT_FRAME_RDX + 8
#define CPU_INTERRUPT_FRAME_R8 CPU_INTERRUPT_FRAME_RSI + 8
#define CPU_INTERRUPT_FRAME_R9 CPU_INTERRUPT_FRAME_R8 + 8
#define CPU_INTERRUPT_FRAME_R10 CPU_INTERRUPT_FRAME_R9 + 8
#define CPU_INTERRUPT_FRAME_R11 CPU_INTERRUPT_FRAME_R10 + 8
#define CPU_INTERRUPT_FRAME_RSP CPU_INTERRUPT_FRAME_R11 + 8
#ifndef ASM
/*
* Caller-saved registers for interrupt frames
*/
typedef struct {
/**
* @note: rdi is a caller-saved register too, but it's used in function calls
* and is hence saved separately on the stack;
*
* @see DISTINCT_INTERRUPT_ENTRY
* @see _ISR_Handler
*/
/* Registers saved by CPU */
uint64_t error_code; /* only in some exceptions */
uint64_t rip;
uint64_t cs;
uint64_t rflags;
uint64_t rsp;
uint64_t ss;
/* Saved in rtems_irq_prologue_* */
uint64_t rdi;
uint64_t rbp;
uint64_t rbx;
/* SSE state saved by the FXSAVE instruction */
uint8_t sse_state[512];
uint64_t rax;
uint64_t rcx;
@@ -126,12 +167,12 @@ typedef struct {
*/
uint64_t saved_rsp;
/* XXX:
* - FS segment selector for TLS
* - x87 status word?
* - MMX?
* - XMM?
/**
* The caller-saved registers needs to start in a 16-byte aligned position
* on the stack for the FXSAVE instruction. Therefore, we have 8 extra bytes
* in case the interrupt handler needs to align it.
*/
uint8_t padding[8];
} CPU_Interrupt_frame;
extern Context_Control_fp _CPU_Null_fp_context;
@@ -140,7 +181,16 @@ extern Context_Control_fp _CPU_Null_fp_context;
#endif /* !ASM */
#define CPU_INTERRUPT_FRAME_SIZE 72
#define CPU_INTERRUPT_FRAME_X86_64_SIZE 48
#define CPU_INTERRUPT_FRAME_PROLOGUE_SIZE 24
#define CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE (512 + 72)
#define CPU_INTERRUPT_FRAME_PADDING_SIZE 8
#define CPU_INTERRUPT_FRAME_SIZE \
(CPU_INTERRUPT_FRAME_X86_64_SIZE + \
CPU_INTERRUPT_FRAME_PROLOGUE_SIZE + \
CPU_INTERRUPT_FRAME_CALLER_SAVED_SIZE + \
CPU_INTERRUPT_FRAME_PADDING_SIZE)
/*
* When SMP is enabled, percpuasm.c has a similar assert, but since we use the
@@ -155,7 +205,7 @@ extern Context_Control_fp _CPU_Null_fp_context;
#define CPU_MPCI_RECEIVE_SERVER_EXTRA_STACK 0
#define CPU_PROVIDES_ISR_IS_IN_PROGRESS FALSE
#define CPU_STACK_MINIMUM_SIZE (1024*4)
#define CPU_STACK_MINIMUM_SIZE (1024*8)
#define CPU_SIZEOF_POINTER 8
#define CPU_ALIGNMENT 16
#define CPU_HEAP_ALIGNMENT CPU_ALIGNMENT
@@ -166,6 +216,9 @@ extern Context_Control_fp _CPU_Null_fp_context;
* ISR handler macros
*/
/* ISR_Level is an uint32_t meaning we can't use RFLAGS to represent it */
#define CPU_ISR_LEVEL_ENABLED 0
#ifndef ASM
#define _CPU_ISR_Enable(_level) \
@@ -282,6 +335,11 @@ void _CPU_Context_switch(
Context_Control *heir
);
RTEMS_NO_RETURN void _CPU_Context_switch_no_return(
Context_Control *executing,
Context_Control *heir
);
RTEMS_NO_RETURN void _CPU_Context_restore( Context_Control *new_context );
typedef struct {
@@ -349,7 +407,7 @@ uint32_t _CPU_Counter_frequency( void );
CPU_Counter_ticks _CPU_Counter_read( void );
#ifdef RTEMS_SMP
*
uint32_t _CPU_SMP_Initialize( void );
bool _CPU_SMP_Start_processor( uint32_t cpu_index );
@@ -358,16 +416,14 @@ CPU_Counter_ticks _CPU_Counter_read( void );
void _CPU_SMP_Prepare_start_multitasking( void );
static inline uint32_t _CPU_SMP_Get_current_processor( void )
{
return 123;
}
uint32_t _CPU_SMP_Get_current_processor( void );
void _CPU_SMP_Send_interrupt( uint32_t target_processor_index );
static inline bool _CPU_Context_Get_is_executing(
const Context_Control *context
)
{
return context->is_executing;
}
@@ -376,6 +432,7 @@ CPU_Counter_ticks _CPU_Counter_read( void );
bool is_executing
)
{
context->is_executing = is_executing;
}
#endif /* RTEMS_SMP */

View File

@@ -127,6 +127,11 @@ static inline void stub_io_wait(void)
*/
}
static inline void amd64_spinwait(void)
{
__asm__ volatile("pause" : : : "memory");
}
#endif /* !ASM */
#endif

View File

@@ -47,6 +47,8 @@ extern "C" {
#define BSP_VECTOR_SPURIOUS 0xFF
/* Target vector number for the APIC timer */
#define BSP_VECTOR_APIC_TIMER 32
/* Target vector number for the APIC timer */
#define BSP_VECTOR_IPI 33
typedef struct _interrupt_descriptor {
uint16_t offset_0; // bits 0-15
@@ -59,13 +61,14 @@ typedef struct _interrupt_descriptor {
uint32_t reserved_zero;
} interrupt_descriptor;
extern interrupt_descriptor amd64_idt[IDT_SIZE];
struct idt_record {
uint16_t limit; /* Size of IDT array - 1 */
uintptr_t base; /* Pointer to IDT array */
} RTEMS_PACKED;
extern interrupt_descriptor amd64_idt[IDT_SIZE];
extern struct idt_record amd64_idtr;
RTEMS_STATIC_ASSERT(
sizeof(struct idt_record) == 10,
"IDT pointer must be exactly 10 bytes"
@@ -123,6 +126,7 @@ extern void rtems_irq_prologue_29(void);
extern void rtems_irq_prologue_30(void);
extern void rtems_irq_prologue_31(void);
extern void rtems_irq_prologue_32(void);
extern void rtems_irq_prologue_33(void);
#ifdef __cplusplus
}

View File

@@ -1,4 +1,5 @@
/*
* Copyright (C) 2024 Matheus Pecoraro
* Copyright (c) 2018.
* Amaan Cheval <amaan.cheval@gmail.com>
*
@@ -45,6 +46,7 @@ BEGIN_CODE
.p2align 1
PUBLIC(_CPU_Context_switch)
PUBLIC(_CPU_Context_switch_no_return)
/* save context argument */
.set RUNCONTEXT_ARG, REG_ARG0
@@ -52,37 +54,83 @@ PUBLIC(_CPU_Context_switch)
.set HEIRCONTEXT_ARG, REG_ARG1
SYM(_CPU_Context_switch):
movq RUNCONTEXT_ARG, rax /* rax = running threads context */
SYM(_CPU_Context_switch_no_return):
movq RUNCONTEXT_ARG, rax /* rax = running threads context */
GET_SELF_CPU_CONTROL_RCX /* rcx = per CPU information */
/* Fill up Context_Control struct */
pushf
popq (0 * CPU_SIZEOF_POINTER)(rax) /* pop rflags into context */
movq rbx, (1 * CPU_SIZEOF_POINTER)(rax)
movq rsp, (2 * CPU_SIZEOF_POINTER)(rax)
movq rbp, (3 * CPU_SIZEOF_POINTER)(rax)
movq r12, (4 * CPU_SIZEOF_POINTER)(rax)
movq r13, (5 * CPU_SIZEOF_POINTER)(rax)
movq r14, (6 * CPU_SIZEOF_POINTER)(rax)
movq r15, (7 * CPU_SIZEOF_POINTER)(rax)
popq CPU_CONTEXT_CONTROL_EFLAGS(rax) /* pop rflags into context */
movq rbx, CPU_CONTEXT_CONTROL_RBX(rax)
movq rsp, CPU_CONTEXT_CONTROL_RSP(rax)
movq rbp, CPU_CONTEXT_CONTROL_RBP(rax)
movq r12, CPU_CONTEXT_CONTROL_R12(rax)
movq r13, CPU_CONTEXT_CONTROL_R13(rax)
movq r14, CPU_CONTEXT_CONTROL_R14(rax)
movq r15, CPU_CONTEXT_CONTROL_R15(rax)
movl PER_CPU_ISR_DISPATCH_DISABLE(rcx), %edx
movl %edx, CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE(rax)
movq rax, r8 /* r8 = running threads context */
movq HEIRCONTEXT_ARG, rax /* rax = heir threads context */
restore:
movq (1 * CPU_SIZEOF_POINTER)(rax), rbx
movq (2 * CPU_SIZEOF_POINTER)(rax), rsp
#ifdef RTEMS_SMP
/*
* The executing thread no longer executes on this processor. Switch
* the stack to the temporary interrupt stack of this processor. Mark
* the context of the executing thread as not executing.
*/
leaq PER_CPU_INTERRUPT_FRAME_AREA + CPU_INTERRUPT_FRAME_SIZE(rcx), rsp
movw $0, CPU_CONTEXT_CONTROL_IS_EXECUTING(r8)
.check_is_executing:
lock btsw $0, CPU_CONTEXT_CONTROL_IS_EXECUTING(rax) /* Indicator in carry flag */
jnc .restore
.get_potential_new_heir:
/* We may have a new heir */
/* Read the executing and heir */
movq PER_CPU_OFFSET_EXECUTING(rcx), r9
movq PER_CPU_OFFSET_HEIR(rcx), r10
/*
* Update the executing only if necessary to avoid cache line
* monopolization.
*/
cmpq r9, r10
je .check_is_executing
/* Calculate the heir context pointer */
addq r10, rax
subq r9, rax
/* Update the executing */
movq r10, PER_CPU_OFFSET_EXECUTING(rcx)
jmp .check_is_executing
#endif
.restore:
movq CPU_CONTEXT_CONTROL_RBX(rax), rbx
movq CPU_CONTEXT_CONTROL_RSP(rax), rsp
/*
* We need to load rflags after rsp to avoid an interrupt while the ISR stack
* is still being used during the initialization process
*/
pushq (0 * CPU_SIZEOF_POINTER)(rax) /* push rflags */
popf /* restore rflags */
pushq CPU_CONTEXT_CONTROL_EFLAGS(rax) /* push rflags */
popf /* restore rflags */
movq (3 * CPU_SIZEOF_POINTER)(rax), rbp
movq (4 * CPU_SIZEOF_POINTER)(rax), r12
movq (5 * CPU_SIZEOF_POINTER)(rax), r13
movq (6 * CPU_SIZEOF_POINTER)(rax), r14
movq (7 * CPU_SIZEOF_POINTER)(rax), r15
movq CPU_CONTEXT_CONTROL_RBP(rax), rbp
movq CPU_CONTEXT_CONTROL_R12(rax), r12
movq CPU_CONTEXT_CONTROL_R13(rax), r13
movq CPU_CONTEXT_CONTROL_R14(rax), r14
movq CPU_CONTEXT_CONTROL_R15(rax), r15
movl CPU_CONTEXT_CONTROL_ISR_DISPATCH_DISABLE(rax), %edx
movl %edx, PER_CPU_ISR_DISPATCH_DISABLE(rcx)
/* XXX: TLS - load GDT and refresh FS segment selector */
@@ -100,7 +148,8 @@ PUBLIC(_CPU_Context_restore)
SYM(_CPU_Context_restore):
movq NEWCONTEXT_ARG, rax /* rax = running threads context */
jmp restore
GET_SELF_CPU_CONTROL_RCX /* rcx = per CPU information */
jmp .restore
END_CODE
END

View File

@@ -16,6 +16,8 @@ links:
uid: abi
- role: build-dependency
uid: obj
- role: build-dependency
uid: objsmp
- role: build-dependency
uid: start
- role: build-dependency

View File

@@ -14,6 +14,7 @@ install:
- bsps/x86_64/amd64/include/bsp.h
- bsps/x86_64/amd64/include/clock.h
- bsps/x86_64/amd64/include/freebsd_loader.h
- bsps/x86_64/amd64/include/gdt.h
- bsps/x86_64/amd64/include/pic.h
- bsps/x86_64/amd64/include/start.h
- destination: ${BSP_INCLUDEDIR}/acpi
@@ -47,5 +48,6 @@ source:
- bsps/x86_64/amd64/interrupts/pic.c
- bsps/x86_64/amd64/start/bspstart.c
- bsps/x86_64/amd64/start/bspreset.c
- bsps/x86_64/amd64/start/gdt.c
- bsps/x86_64/amd64/start/page.c
type: build

View File

@@ -0,0 +1,19 @@
SPDX-License-Identifier: CC-BY-SA-4.0 OR BSD-2-Clause
build-type: objects
cflags: []
copyrights:
- Copyright (C) 2024 Matheus Pecoraro
cppflags: []
cxxflags: []
enabled-by:
- RTEMS_SMP
includes: []
install:
- destination: ${BSP_INCLUDEDIR}
source:
- bsps/x86_64/amd64/include/smp.h
links: []
source:
- bsps/x86_64/amd64/start/ap_trampoline.S
- bsps/x86_64/amd64/start/bspsmp.c
type: build

View File

@@ -52,6 +52,7 @@ enabled-by:
- sparc/gr712rc
- sparc/gr740
- sparc/leon3
- x86_64/amd64
links: []
name: RTEMS_SMP
type: build