From 8eba47816f94d422a1eb1bdeb2c1ae22ad5f0a9d Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 9 Nov 2016 00:21:56 +0100 Subject: [PATCH 01/31] Begin new post about double faults --- blog/post/double-faults.md | 153 +++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 blog/post/double-faults.md diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md new file mode 100644 index 00000000..e794e27b --- /dev/null +++ b/blog/post/double-faults.md @@ -0,0 +1,153 @@ ++++ +title = "Double Faults" +date = "2016-11-08" ++++ + +In this post we will make our kernel completely exception-proof by catching double faults on a separate kernel stack. + + + +## Triggering a Double Fault +A double fault occurs whenever the CPU fails to call the handler function for an exception. On a high level it's like a catch-all handler, similar to `catch(...)` in C++ or `catch(Exception e)` in Java or C#. + +The most common case is that there isn't a handler defined in the IDT. However, a double fault also occurs if the exception handler lies on a unaccessible page of if the CPU fails to push the exception stack frame. + +Let's provoke a double fault by triggering an exception for that we didn't define a handler function yet: + +{{< highlight rust "hl_lines=10" >}} +// in src/lib.rs + +#[no_mangle] +pub extern "C" fn rust_main(multiboot_information_address: usize) { + ... + // initialize our IDT + interrupts::init(); + + // trigger a debug exception + unsafe { int!(1) }; + + println!("It did not crash!"); + loop {} +} +{{< / highlight >}} + +We use the [int! macro] of the [x86 crate] to trigger the exception with vector number `1`. The exception with that vector number is the [debug exception]. Like the [breakpoint exception], it is mainly used for debuggers. We haven't registered a handler function in our [IDT], so this line should cause a double fault in the CPU. + +[int! macro]: https://docs.rs/x86/0.8.0/x86/macro.int!.html +[x86 crate]: https://github.com/gz/rust-x86 +[debug exception]: http://wiki.osdev.org/Exceptions#Debug +[breakpoint exception]: http://wiki.osdev.org/Exceptions#Breakpoint + +[IDT]: https://en.wikipedia.org/wiki/Interrupt_descriptor_table + +When we start our kernel now, we see that it enters an endless loop: + +![boot loop](images/boot-loop.gif) + +The reason for the boot loop is the following: + +1. The CPU executes the `int 1` instruction macro, which causes a software-invoked `Debug` exception. +2. The CPU looks at the corresponding entry in the IDT and sees that the present bit isn't set. Thus, it can't call the debug exception handler and a double fault occurs. +3. The CPU looks at the IDT entry of the double fault handler, but this entry is also non-present. Thus, a _triple_ fault occurs. +4. A triple fault is fatal. QEMU reacts to it like most real hardware and issues a system reset. + +So in order to prevent this triple fault, we need to either provide a handler function for `Debug` exceptions or a double fault handler. We will do the latter, since this post is all about the double fault. + +## A Double Fault Handler +A double fault is a normal exception with an error code, so we can use our `handler_with_error_code` macro to create a wrapper function: + +{{< highlight rust "hl_lines=10 17 18 19 20 21 22" >}} +// in src/interrupts/mod.rs + +lazy_static! { + static ref IDT: idt::Idt = { + let mut idt = idt::Idt::new(); + + idt.set_handler(0, handler!(divide_by_zero_handler)); + idt.set_handler(3, handler!(breakpoint_handler)); + idt.set_handler(6, handler!(invalid_opcode_handler)); + idt.set_handler(8, handler_with_error_code!(double_fault_handler)); + idt.set_handler(14, handler_with_error_code!(page_fault_handler)); + + idt + }; +} + +extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, + _error_code: u64) +{ + println!("\nEXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); + loop {} +} +{{< / highlight >}} + +The error code of the double fault handler is always zero, so we don't print it. + +When we start our kernel now, we should see that the double fault handler is invoked: + +![QEMU printing `EXCEPTION: DOUBLE FAULT` and the exception stack frame](images/qemu-catch-double-fault.png) + +It worked! Here is what happens this time: + +1. The CPU executes the `int 1` instruction macro, which causes a software-invoked `Debug` exception. +2. The CPU looks at the corresponding entry in the IDT and sees that the present bit isn't set. Thus, it can't call the debug exception handler and a double fault occurs. +3. The CPU jumps to the – now present – double fault handler. + +The triple fault (and the boot-loop) no longer occurs, since the CPU can now call the double fault handler. + +That was pretty straightforward! So why do we need a whole post for this topic? Well, we're now able to catch _most_ double faults, but there are some edge cases where our current approach doesn't suffice. + +## Stack Overflows +An example for such an edge case is a kernel stack a kernel stack overflow. We can easily provoke one through a function with endless recursion: + +{{< highlight rust "hl_lines=9 10 11 14" >}} +// in src/lib.rs + +#[no_mangle] +pub extern "C" fn rust_main(multiboot_information_address: usize) { + ... + // initialize our IDT + interrupts::init(); + + fn stack_overflow() { + stack_overflow(); + } + + // trigger a stack overflow + stack_overflow(); + + println!("It did not crash!"); + loop {} +} +{{< / highlight >}} + +When we try this code in QEMU, we see that the system enters a boot-loop again. Here is what happens: When the `stack_overflow` function is called, the whole stack gets filled with return addresses. At some point, we overflow the stack and hit the guard page, which we [set up][set up guard page] for exactly this case. Thus, a _page fault_ occurs. + +Now the CPU pushes the exception stack frame and the registers and invokes the page fault handler… wait… this can't work. We overflowed our stack, so the stack pointer points to the guard page. And now the CPU tries to push to it, which causes another page fault. At this point, a double fault occurs, since an exception occurred while calling an exception handler. + +So the CPU tries to invoke the double fault handler now. But first, it tries to push the exception stack frame, since exceptions on x86 work that way. Of course, this is still not possible (the stack pointer still points to the guard page), so another page fault occurs while calling the double fault handler. Thus, a triple fault occurs and QEMU issues a system reset. + +So how can we avoid this problem? We can't omit the pushing of the exception stack frame, since it's the CPU itself that does it. So we need to ensure somehow that the stack is always valid when a double fault exception occurs. Fortunately, the x86_64 architecture has a trick for this problem. + +## Switching Stacks +The x86_64 architecture is able to switch to a predefined stack when an exception occurs. However, it is a bit cumbersome to setup this mechanism. + +The mechanism consists of two main components: An _Interrupt Stack Table_ and a _Task State Segment_. + + +Switching stacks +The Interrupt Stack Table +The Task State Segment +The Global Descriptor Table (again) +Putting it together +What’s next? + +In the previous post, we learned how to return from exceptions correctly. In this post, we will explore a special type of exception: the double fault. The double fault occurs whenever the invokation of an excpption handler fails. For example, if we didn't declare any exception hanlder in the IDT. + +Let's start by creating a handler function for double faults: + +```rust + +``` + +Next, we need to register the double fault handler in our IDT: From 0cd2577abeb03590f6b8c041d7a17c071187244d Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 9 Nov 2016 00:22:13 +0100 Subject: [PATCH 02/31] Some code for double faults post --- src/interrupts/mod.rs | 6 ++++++ src/lib.rs | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 6556236e..c0232760 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -93,6 +93,7 @@ lazy_static! { idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); + idt.set_handler(8, handler_with_error_code!(double_fault_handler)); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt @@ -150,3 +151,8 @@ extern "C" fn page_fault_handler(stack_frame: &ExceptionStackFrame, error_code: stack_frame); loop {} } + +extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, _error_code: u64) { + println!("\nEXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); + loop {} +} diff --git a/src/lib.rs b/src/lib.rs index 441b2e5f..87c17367 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,8 +56,23 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { // initialize our IDT interrupts::init(); - // trigger a breakpoint exception - unsafe { int!(3) }; + stack_overflow(); + // trigger a debug exception + unsafe { int!(1) }; + + fn divide_by_zero() { + unsafe { asm!("mov dx, 0; div dx" ::: "ax", "dx" : "volatile", "intel") } + } + + fn int_overflow() { + unsafe { asm!("mov al, 0xf0; add al, 0x10; into" ::: "ax", "dx" : "volatile", "intel") } + } + + fn stack_overflow() { + let _large_array = [1; 100000]; + } + + int_overflow(); println!("It did not crash!"); loop {} From b4bc47d5d99c1ca5868598d8ab4856ce222d1329 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Fri, 25 Nov 2016 18:29:29 +0100 Subject: [PATCH 03/31] Add a new section about the causes of double faults (+ many other improvements) --- blog/post/double-faults.md | 93 +++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 16 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index e794e27b..96937fe4 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -5,13 +5,16 @@ date = "2016-11-08" In this post we will make our kernel completely exception-proof by catching double faults on a separate kernel stack. - + -## Triggering a Double Fault -A double fault occurs whenever the CPU fails to call the handler function for an exception. On a high level it's like a catch-all handler, similar to `catch(...)` in C++ or `catch(Exception e)` in Java or C#. +## What is a Double Fault? +In simplified terms, a double fault is a special exception that occurs when the CPU can't invoke an exception handler. For example, it occurs when a page fault is triggered but there is no page fault handler registered in the [IDT]. So it's kind of similar to catch-all blocks in programming languages with exceptions, e.g. `catch(...)` in C++ or `catch(Exception e)` in Java or C#. -The most common case is that there isn't a handler defined in the IDT. However, a double fault also occurs if the exception handler lies on a unaccessible page of if the CPU fails to push the exception stack frame. +[IDT]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} +A double fault behaves like a normal exception. It has the vector number `8` and we can define a normal handler function for it in the IDT. It is really important to provide a double fault handler, because if a double faults is unhandled a fatal _triple fault_ occurs. Triple faults can't be caught and most hardware reacts with a system reset. + +### Triggering a Double Fault Let's provoke a double fault by triggering an exception for that we didn't define a handler function yet: {{< highlight rust "hl_lines=10" >}} @@ -31,32 +34,36 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { } {{< / highlight >}} -We use the [int! macro] of the [x86 crate] to trigger the exception with vector number `1`. The exception with that vector number is the [debug exception]. Like the [breakpoint exception], it is mainly used for debuggers. We haven't registered a handler function in our [IDT], so this line should cause a double fault in the CPU. +We use the [int! macro] of the [x86 crate] to trigger the exception with vector number `1`, which is the [debug exception]. The debug exception occurs for example when a breakpoint defined in the [debug registers] is hit. Like the [breakpoint exception], it is mainly used for [implementing debuggers]. [int! macro]: https://docs.rs/x86/0.8.0/x86/macro.int!.html [x86 crate]: https://github.com/gz/rust-x86 [debug exception]: http://wiki.osdev.org/Exceptions#Debug +[debug registers]: https://en.wikipedia.org/wiki/X86_debug_register [breakpoint exception]: http://wiki.osdev.org/Exceptions#Breakpoint +[implementing debuggers]: http://www.ksyash.com/2011/01/210/ -[IDT]: https://en.wikipedia.org/wiki/Interrupt_descriptor_table +We haven't registered a handler function for the debug exception in our [IDT], so the `int!(1)` line should cause a double fault in the CPU. -When we start our kernel now, we see that it enters an endless loop: +When we start our kernel now, we see that it enters an endless boot loop: ![boot loop](images/boot-loop.gif) The reason for the boot loop is the following: -1. The CPU executes the `int 1` instruction macro, which causes a software-invoked `Debug` exception. +1. The CPU executes the [int 1] instruction, which causes a software-invoked `Debug` exception. 2. The CPU looks at the corresponding entry in the IDT and sees that the present bit isn't set. Thus, it can't call the debug exception handler and a double fault occurs. 3. The CPU looks at the IDT entry of the double fault handler, but this entry is also non-present. Thus, a _triple_ fault occurs. 4. A triple fault is fatal. QEMU reacts to it like most real hardware and issues a system reset. +[int 1]: https://en.wikipedia.org/wiki/INT_(x86_instruction) + So in order to prevent this triple fault, we need to either provide a handler function for `Debug` exceptions or a double fault handler. We will do the latter, since this post is all about the double fault. -## A Double Fault Handler +### A Double Fault Handler A double fault is a normal exception with an error code, so we can use our `handler_with_error_code` macro to create a wrapper function: -{{< highlight rust "hl_lines=10 17 18 19 20 21 22" >}} +{{< highlight rust "hl_lines=10 17" >}} // in src/interrupts/mod.rs lazy_static! { @@ -73,6 +80,7 @@ lazy_static! { }; } +// our new double fault handler extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, _error_code: u64) { @@ -81,7 +89,7 @@ extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, } {{< / highlight >}} -The error code of the double fault handler is always zero, so we don't print it. +Our handler prints a short error message and dumps the exception stack frame. The error code of the double fault handler is _always zero_, so there's no reason to print it. When we start our kernel now, we should see that the double fault handler is invoked: @@ -90,15 +98,60 @@ When we start our kernel now, we should see that the double fault handler is inv It worked! Here is what happens this time: 1. The CPU executes the `int 1` instruction macro, which causes a software-invoked `Debug` exception. -2. The CPU looks at the corresponding entry in the IDT and sees that the present bit isn't set. Thus, it can't call the debug exception handler and a double fault occurs. +2. Like before, the CPU looks at the corresponding entry in the IDT and sees that the present bit isn't set. Thus, it can't call the debug exception handler and a double fault occurs. 3. The CPU jumps to the – now present – double fault handler. The triple fault (and the boot-loop) no longer occurs, since the CPU can now call the double fault handler. -That was pretty straightforward! So why do we need a whole post for this topic? Well, we're now able to catch _most_ double faults, but there are some edge cases where our current approach doesn't suffice. +That was pretty straightforward! So why do we need a whole post for this topic? Well, we're now able to catch _most_ double faults, but there are some cases where our current approach doesn't suffice. -## Stack Overflows -An example for such an edge case is a kernel stack a kernel stack overflow. We can easily provoke one through a function with endless recursion: +## Causes of Double Faults +Before we look at the special cases, we need to know the exact causes of double faults. Above, we used a pretty vague definition: + +> A double fault is a special exception that occurs when the CPU can't invoke an exception handler. + +What does _“can't invoke”_ mean exactly? The handler is not present? The handler is [swapped out]? And what happens if a handler causes exceptions itself? + +[swapped out]: http://pages.cs.wisc.edu/~remzi/OSTEP/vm-beyondphys.pdf + +For example, what happens if… : + +1. a divide-by-zero exception occurs, but the corresponding handler function is swapped out? +2. a page fault occurs, but the page fault handler is swapped out? +3. a divide-by-zero handler invokes a breakpoint exception, but the breakpoint handler is swapped out? +4. our kernel overflows its stack and the [guard page] is hit? + +[guard page]: {{% relref "07-remap-the-kernel.md#creating-a-guard-page" %}} + +Fortunately, the AMD64 manual ([PDF][AMD64 manual]) has an exact definition (in Section 8.2.9). According to it, a “double fault exception _can_ occur when a second exception occurs during the handling of a prior (first) exception handler”. The _“can”_ is important: Only very specific combinations of exceptions lead to a double fault. These combinations are: + +First Exception | Second Exception +----------------|----------------- +divide-by-zero,
invalid-tss,
segment-not-present,
stack,
general-protection | invalid-tss,
segment-not-present,
stack,
general-protection +page fault | page fault,
invalid-tss,
segment-not-present,
stack,
general-protection + +[AMD64 manual]: http://developer.amd.com/wordpress/media/2012/10/24593_APM_v21.pdf + +So for example a divide-by-zero fault followed by a page fault is fine, but a divide-by-zero fault followed by a general-protection fault leads to a double fault. With the help of this table, we can answer the first three of the above questions: + +1. When a divide-by-zero exception occurs and the corresponding handler function is swapped out, a _page fault_ occurs and the _page fault handler_ is invoked. +2. When a page fault occurs and the page fault handler is swapped out, a _double fault_ occurs and the _double fault_ handler is invoked. +3. When a divide-by-zero handler invokes a breakpoint exception and the breakpoint handler is swapped out, a _breakpoint exception_ occurs first. However, the corresponding handler is swapped out, so a _page fault_ occurs and the _page fault handler_ is invoked. + +In fact, even the case of a non-present handler follows this scheme: A non-present handler causes a _segment-not-present_ exception. We didn't define a segment-not-present handler, so another segment-not-present exception occurs. According to the table, this leads to a double fault. + +### Kernel Stack Overflow +Let's look at the fourth question: + +> What happens if our kernel overflows its stack and the [guard page] is hit? + +When our kernel overflows its stack and hits the guard page, a _page fault_ occurs and the CPU invokes the page fault handler. However, the CPU also tries to push the [exception stack frame] onto the stack. This fails of course, since our current stack pointer still points to the guard page. Thus, a second page fault occurs, which causes a double fault (according to the above table). + +[exception stack frame]: http://os.phil-opp.com/better-exception-messages.html#exceptions-in-detail + +So the CPU tries to call our _double fault handler_ now. However, on a double fault the CPU tries to push the exception stack frame, too. Thus, a _third_ page fault occurs, which causes a _triple fault_ and a system reboot. So our current double fault handler can't avoid a triple fault in this case. + +Let's try it ourselves! We can easily provoke a kernel stack overflow by calling a function that recurses endlessly: {{< highlight rust "hl_lines=9 10 11 14" >}} // in src/lib.rs @@ -142,7 +195,7 @@ The Global Descriptor Table (again) Putting it together What’s next? -In the previous post, we learned how to return from exceptions correctly. In this post, we will explore a special type of exception: the double fault. The double fault occurs whenever the invokation of an excpption handler fails. For example, if we didn't declare any exception hanlder in the IDT. +In the previous post, we learned how to return from exceptions correctly. In this post, we will explore a special type of exception: the double fault. The double fault occurs whenever the invokation of an exception handler fails. For example, if we didn't declare any exception hanlder in the IDT. Let's start by creating a handler function for double faults: @@ -151,3 +204,11 @@ Let's start by creating a handler function for double faults: ``` Next, we need to register the double fault handler in our IDT: + + +Double faults also occur when an exception occurs while the CPU is trying to invoke an exception handler. For example, let's assume a divide-by-zero exception occurs but the OS accidentally [swapped out] the corresponding handler function. Now the CPU tries to call the divide-by-zero handler, which + + +A double fault occurs whenever the CPU fails to call an exception handler. On a high level it's like a catch-all handler, similar to `catch(...)` in C++ or `catch(Exception e)` in Java or C#. + +The most common case is that there isn't a handler defined in the IDT. However, a double fault also occurs if the exception handler lies on a unaccessible page of if the CPU fails to push the exception stack frame. From 5f8de6e8711ab666fa4708e0e6ebdcc985e7e21b Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 28 Nov 2016 10:13:27 +0100 Subject: [PATCH 04/31] Create TSS and GDT modules and use a double fault stack --- src/interrupts/gdt.rs | 120 ++++++++++++++++++++++++++++++++++ src/interrupts/idt.rs | 8 +-- src/interrupts/mod.rs | 50 ++++++++++++-- src/interrupts/tss.rs | 50 ++++++++++++++ src/lib.rs | 13 +++- src/memory/mod.rs | 33 +++++++++- src/memory/paging/mod.rs | 12 +++- src/memory/stack_allocator.rs | 51 +++++++++++++++ 8 files changed, 320 insertions(+), 17 deletions(-) create mode 100644 src/interrupts/gdt.rs create mode 100644 src/interrupts/tss.rs create mode 100644 src/memory/stack_allocator.rs diff --git a/src/interrupts/gdt.rs b/src/interrupts/gdt.rs new file mode 100644 index 00000000..76418808 --- /dev/null +++ b/src/interrupts/gdt.rs @@ -0,0 +1,120 @@ +use bit_field::BitField; +use collections::vec::Vec; +use interrupts::tss::TaskStateSegment; + +pub struct Gdt(Vec); + +impl Gdt { + pub fn new() -> Gdt { + let zero_entry = 0; + Gdt(vec![zero_entry]) + } + + pub fn add_entry(&mut self, entry: Entry) -> Selector { + use core::mem::size_of; + let index = self.0.len() * size_of::(); + + match entry { + Entry::UserSegment(entry) => self.0.push(entry), + Entry::SystemSegment(entry_low, entry_high) => { + self.0.push(entry_low); + self.0.push(entry_high); + } + } + + Selector(index as u16) + } + + pub fn load(&'static self) { + use x86::dtables::{DescriptorTablePointer, lgdt}; + use core::mem::size_of; + + let ptr = DescriptorTablePointer { + base: self.0.as_ptr() as u64, + limit: (self.0.len() * size_of::() - 1) as u16, + }; + + unsafe { lgdt(&ptr) }; + } +} + +pub enum Entry { + UserSegment(u64), + SystemSegment(u64, u64), +} + +impl Entry { + pub fn code_segment() -> Entry { + let flags = DESCRIPTOR_TYPE | PRESENT | READ_WRITE | EXECUTABLE | LONG_MODE; + Entry::UserSegment(flags.bits()) + } + + pub fn data_segment() -> Entry { + let flags = DESCRIPTOR_TYPE | PRESENT | READ_WRITE; + Entry::UserSegment(flags.bits()) + } + + pub fn tss_segment(tss: &'static TaskStateSegment) -> Entry { + use core::mem::size_of; + + let ptr = tss as *const _ as u64; + + let mut low = PRESENT.bits(); + low.set_range(0..16, (size_of::() - 1) as u64); + low.set_range(16..40, ptr.get_range(0..24)); + low.set_range(40..44, 0b1001); // type: available 64-bit tss + + let mut high = 0; + high.set_range(0..32, ptr.get_range(32..64)); + + Entry::SystemSegment(low, high) + } +} + +bitflags! { + flags EntryFlags: u64 { + const READ_WRITE = 1 << 41, + const CONFORMING = 1 << 42, + const EXECUTABLE = 1 << 43, + const DESCRIPTOR_TYPE = 1 << 44, + const PRESENT = 1 << 47, + const LONG_MODE = 1 << 53, + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Selector(u16); + +impl Selector { + pub fn new() -> Selector { + Selector(0) + } +} + +pub fn reload_segment_registers(code_selector: Selector, data_selector: Selector) { + + let current_code_selector: u16; + let current_data_selector: u16; + + unsafe { + asm!("mov $0, cs" : "=r" (current_code_selector) ::: "intel"); + asm!("mov $0, ds" : "=r" (current_data_selector) ::: "intel"); + } + assert_eq!(code_selector.0, current_code_selector); + assert_eq!(data_selector.0, current_data_selector); + + // jmp ax:.new_code_segment // TODO + // .new_code_segment: + // unsafe { asm!(" + // mov ax, $1 + // mov ss, ax + // mov ds, ax + // mov es, ax + // ":: "r" (code_selector.0), "r" (data_selector.0) :: "intel")}; + // +} + +/// Load the task state register. +pub unsafe fn load_ltr(selector: Selector) { + asm!("ltr $0" :: "r" (selector)); +} diff --git a/src/interrupts/idt.rs b/src/interrupts/idt.rs index 9c191cbb..0d290150 100644 --- a/src/interrupts/idt.rs +++ b/src/interrupts/idt.rs @@ -102,14 +102,14 @@ impl EntryOptions { } #[allow(dead_code)] - pub fn set_privilege_level(&mut self, dpl: u16) -> &mut Self { - self.0.set_range(13..15, dpl); + pub fn set_privilege_level(&mut self, dpl: u8) -> &mut Self { + self.0.set_range(13..15, dpl.into()); self } #[allow(dead_code)] - pub fn set_stack_index(&mut self, index: u16) -> &mut Self { - self.0.set_range(0..3, index); + pub fn set_stack_index(&mut self, index: u8) -> &mut Self { + self.0.set_range(0..3, index.into()); self } } diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index c0232760..246a70cb 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -7,7 +7,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use spin::Once; +use memory::StackPointer; + mod idt; +mod tss; +mod gdt; macro_rules! save_scratch_registers { () => { @@ -86,22 +91,53 @@ macro_rules! handler_with_error_code { }} } -lazy_static! { - static ref IDT: idt::Idt = { +static IDT: Once = Once::new(); +static TSS: Once = Once::new(); +static GDT: Once = Once::new(); + +pub fn init(double_fault_stack: StackPointer) { + let mut double_fault_ist_index = 0; + + let tss = TSS.call_once(|| { + let mut tss = tss::TaskStateSegment::new(); + + double_fault_ist_index = tss.interrupt_stacks + .insert_stack(double_fault_stack) + .expect("IST flush_all"); + + tss + }); + + let mut code_selector = gdt::Selector::new(); + let mut data_selector = gdt::Selector::new(); + let mut tss_selector = gdt::Selector::new(); + let gdt = GDT.call_once(|| { + let mut gdt = gdt::Gdt::new(); + + code_selector = gdt.add_entry(gdt::Entry::code_segment()); + data_selector = gdt.add_entry(gdt::Entry::data_segment()); + tss_selector = gdt.add_entry(gdt::Entry::tss_segment(tss)); + + gdt + }); + gdt.load(); + gdt::reload_segment_registers(code_selector, data_selector); + unsafe { gdt::load_ltr(tss_selector) }; + + let idt = IDT.call_once(|| { let mut idt = idt::Idt::new(); idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); - idt.set_handler(8, handler_with_error_code!(double_fault_handler)); + idt.set_handler(8, handler_with_error_code!(double_fault_handler)) + .set_stack_index(double_fault_ist_index); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt - }; -} + }); -pub fn init() { - IDT.load(); + idt.load(); } #[derive(Debug)] diff --git a/src/interrupts/tss.rs b/src/interrupts/tss.rs new file mode 100644 index 00000000..47e38a53 --- /dev/null +++ b/src/interrupts/tss.rs @@ -0,0 +1,50 @@ +use memory::StackPointer; + +#[derive(Debug)] +#[repr(C, packed)] +pub struct TaskStateSegment { + reserved_0: u32, + pub privilege_stacks: PrivilegeStackTable, + reserved_1: u64, + pub interrupt_stacks: InterruptStackTable, + reserved_2: u64, + reserved_3: u16, + iomap_base: u16, +} + +impl TaskStateSegment { + pub fn new() -> TaskStateSegment { + TaskStateSegment { + privilege_stacks: PrivilegeStackTable([None, None, None]), + interrupt_stacks: InterruptStackTable::new(), + iomap_base: 0, + reserved_0: 0, + reserved_1: 0, + reserved_2: 0, + reserved_3: 0, + } + } +} + +#[derive(Debug)] +pub struct PrivilegeStackTable([Option; 3]); + +#[derive(Debug)] +pub struct InterruptStackTable([Option; 7]); + +impl InterruptStackTable { + pub fn new() -> InterruptStackTable { + InterruptStackTable([None, None, None, None, None, None, None]) + } + + pub fn insert_stack(&mut self, stack_pointer: StackPointer) -> Result { + // TSS index starts at 1 + for (entry, i) in self.0.iter_mut().zip(1..) { + if entry.is_none() { + *entry = Some(stack_pointer); + return Ok(i); + } + } + Err(stack_pointer) + } +} diff --git a/src/lib.rs b/src/lib.rs index 87c17367..d53a90d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,8 @@ #![feature(asm)] #![feature(naked_functions)] #![feature(core_intrinsics)] +#![feature(nonzero)] +#![feature(drop_types_in_const)] #![no_std] extern crate rlibc; @@ -51,10 +53,15 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { enable_write_protect_bit(); // set up guard page and map the heap pages - memory::init(boot_info); + let mut memory_controller = memory::init(boot_info); + // initialize our IDT - interrupts::init(); + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + interrupts::init(double_fault_stack); + + unsafe { int!(3) }; stack_overflow(); // trigger a debug exception @@ -69,7 +76,7 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { } fn stack_overflow() { - let _large_array = [1; 100000]; + stack_overflow(); } int_overflow(); diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 266aa5a2..25c11f4f 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -9,15 +9,17 @@ pub use self::area_frame_allocator::AreaFrameAllocator; pub use self::paging::remap_the_kernel; +pub use self::stack_allocator::{StackAllocator, StackPointer}; use self::paging::PhysicalAddress; use multiboot2::BootInformation; mod area_frame_allocator; mod paging; +mod stack_allocator; pub const PAGE_SIZE: usize = 4096; -pub fn init(boot_info: &BootInformation) { +pub fn init(boot_info: &BootInformation) -> MemoryController { assert_has_not_been_called!("memory::init must be called only once"); let memory_map_tag = boot_info.memory_map_tag().expect("Memory map tag required"); @@ -58,6 +60,35 @@ pub fn init(boot_info: &BootInformation) { for page in Page::range_inclusive(heap_start_page, heap_end_page) { active_table.map(page, paging::WRITABLE, &mut frame_allocator); } + + let stack_allocator = { + let stack_alloc_start_page = heap_end_page + 1; + let stack_alloc_end_page = stack_alloc_start_page + 100; + let stack_alloc_page_range = Page::range_inclusive(stack_alloc_start_page, + stack_alloc_end_page); + stack_allocator::new_stack_allocator(stack_alloc_page_range) + }; + + MemoryController { + active_table: active_table, + frame_allocator: frame_allocator, + stack_allocator: stack_allocator, + } +} + +pub struct MemoryController { + active_table: paging::ActivePageTable, + frame_allocator: AreaFrameAllocator, + stack_allocator: StackAllocator, +} + +impl MemoryController { + pub fn alloc_stack(&mut self, size_in_pages: usize) -> Result { + let &mut MemoryController { ref mut active_table, + ref mut frame_allocator, + ref mut stack_allocator } = self; + stack_allocator.alloc_stack(active_table, frame_allocator, size_in_pages) + } } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/memory/paging/mod.rs b/src/memory/paging/mod.rs index 6c80fbcd..777d6202 100644 --- a/src/memory/paging/mod.rs +++ b/src/memory/paging/mod.rs @@ -11,7 +11,7 @@ pub use self::entry::*; use memory::{PAGE_SIZE, Frame, FrameAllocator}; use self::temporary_page::TemporaryPage; pub use self::mapper::Mapper; -use core::ops::{Deref, DerefMut}; +use core::ops::{Deref, DerefMut, Add}; use multiboot2::BootInformation; mod entry; @@ -37,7 +37,7 @@ impl Page { Page { number: address / PAGE_SIZE } } - fn start_address(&self) -> usize { + pub fn start_address(&self) -> usize { self.number * PAGE_SIZE } @@ -62,6 +62,14 @@ impl Page { } } +impl Add for Page { + type Output = Page; + + fn add(self, rhs: usize) -> Page { + Page { number: self.number + rhs } + } +} + pub struct PageIter { start: Page, end: Page, diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs new file mode 100644 index 00000000..2e7496fb --- /dev/null +++ b/src/memory/stack_allocator.rs @@ -0,0 +1,51 @@ +use memory::paging::{self, Page, PageIter, ActivePageTable}; +use memory::{PAGE_SIZE, FrameAllocator}; +use core::nonzero::NonZero; + +pub fn new_stack_allocator(page_range: PageIter) -> StackAllocator { + StackAllocator { range: page_range } +} + +pub struct StackAllocator { + range: PageIter, +} + +impl StackAllocator { + pub fn alloc_stack(&mut self, + active_table: &mut ActivePageTable, + frame_allocator: &mut FA, + size_in_pages: usize) + -> Result { + if size_in_pages == 0 { + return Err(()); + } + + let _guard_page = self.range.next().ok_or(())?; + + let stack_start = self.range.next().ok_or(())?; + let stack_end = if size_in_pages == 1 { + stack_start + } else { + self.range.nth(size_in_pages - 1).ok_or(())? + }; + + for page in Page::range_inclusive(stack_start, stack_end) { + active_table.map(page, paging::WRITABLE, frame_allocator); + } + + let top_of_stack = stack_end.start_address() + PAGE_SIZE; + StackPointer::new(top_of_stack).ok_or(()) + } +} + +#[derive(Debug)] +pub struct StackPointer(NonZero); + +impl StackPointer { + fn new(ptr: usize) -> Option { + match ptr { + 0 => None, + ptr => Some(StackPointer(unsafe { NonZero::new(ptr) })), + } + } +} From 038fd097b61b126fc500bdea4a0ee853d9afc07b Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 28 Nov 2016 10:13:57 +0100 Subject: [PATCH 05/31] Update post --- blog/post/double-faults.md | 102 ++++++++++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 12 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 96937fe4..2b9f13e2 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -127,12 +127,22 @@ Fortunately, the AMD64 manual ([PDF][AMD64 manual]) has an exact definition (in First Exception | Second Exception ----------------|----------------- -divide-by-zero,
invalid-tss,
segment-not-present,
stack,
general-protection | invalid-tss,
segment-not-present,
stack,
general-protection -page fault | page fault,
invalid-tss,
segment-not-present,
stack,
general-protection +[Divide-by-zero],
[Invalid TSS],
[Segment Not Present],
[Stack-Segment Fault],
[General Protection Fault] | [Invalid TSS],
[Segment Not Present],
[Stack-Segment Fault],
[General Protection Fault] +[Page Fault] | [Page Fault],
[Invalid TSS],
[Segment Not Present],
[Stack-Segment Fault],
[General Protection Fault] + +[Divide-by-zero]: http://wiki.osdev.org/Exceptions#Divide-by-zero_Error +[Invalid TSS]: http://wiki.osdev.org/Exceptions#Invalid_TSS +[Segment Not Present]: http://wiki.osdev.org/Exceptions#Segment_Not_Present +[Stack-Segment Fault]: http://wiki.osdev.org/Exceptions#Stack-Segment_Fault +[General Protection Fault]: http://wiki.osdev.org/Exceptions#General_Protection_Fault +[Page Fault]: http://wiki.osdev.org/Exceptions#Page_Fault + [AMD64 manual]: http://developer.amd.com/wordpress/media/2012/10/24593_APM_v21.pdf -So for example a divide-by-zero fault followed by a page fault is fine, but a divide-by-zero fault followed by a general-protection fault leads to a double fault. With the help of this table, we can answer the first three of the above questions: +So for example a divide-by-zero fault followed by a page fault is fine (the page fault handler is invoked), but a divide-by-zero fault followed by a general-protection fault leads to a double fault. + +With the help of this table, we can answer the first three of the above questions: 1. When a divide-by-zero exception occurs and the corresponding handler function is swapped out, a _page fault_ occurs and the _page fault handler_ is invoked. 2. When a page fault occurs and the page fault handler is swapped out, a _double fault_ occurs and the _double fault_ handler is invoked. @@ -149,7 +159,7 @@ When our kernel overflows its stack and hits the guard page, a _page fault_ occu [exception stack frame]: http://os.phil-opp.com/better-exception-messages.html#exceptions-in-detail -So the CPU tries to call our _double fault handler_ now. However, on a double fault the CPU tries to push the exception stack frame, too. Thus, a _third_ page fault occurs, which causes a _triple fault_ and a system reboot. So our current double fault handler can't avoid a triple fault in this case. +So the CPU tries to call our _double fault handler_ now. However, on a double fault the CPU tries to push the exception stack frame, too. Our stack pointer still points to the guard page, so a _third_ page fault occurs, which causes a _triple fault_ and a system reboot. So our current double fault handler can't avoid a triple fault in this case. Let's try it ourselves! We can easily provoke a kernel stack overflow by calling a function that recurses endlessly: @@ -163,7 +173,7 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { interrupts::init(); fn stack_overflow() { - stack_overflow(); + stack_overflow(); // for each recursion, the return address is pushed } // trigger a stack overflow @@ -174,16 +184,84 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { } {{< / highlight >}} -When we try this code in QEMU, we see that the system enters a boot-loop again. Here is what happens: When the `stack_overflow` function is called, the whole stack gets filled with return addresses. At some point, we overflow the stack and hit the guard page, which we [set up][set up guard page] for exactly this case. Thus, a _page fault_ occurs. +When we try this code in QEMU, we see that the system enters a boot-loop again. -Now the CPU pushes the exception stack frame and the registers and invokes the page fault handler… wait… this can't work. We overflowed our stack, so the stack pointer points to the guard page. And now the CPU tries to push to it, which causes another page fault. At this point, a double fault occurs, since an exception occurred while calling an exception handler. - -So the CPU tries to invoke the double fault handler now. But first, it tries to push the exception stack frame, since exceptions on x86 work that way. Of course, this is still not possible (the stack pointer still points to the guard page), so another page fault occurs while calling the double fault handler. Thus, a triple fault occurs and QEMU issues a system reset. - -So how can we avoid this problem? We can't omit the pushing of the exception stack frame, since it's the CPU itself that does it. So we need to ensure somehow that the stack is always valid when a double fault exception occurs. Fortunately, the x86_64 architecture has a trick for this problem. +So how can we avoid this problem? We can't omit the pushing of the exception stack frame, since the CPU itself does it. So we need to ensure somehow that the stack is always valid when a double fault exception occurs. Fortunately, the x86_64 architecture has a solution to this problem. ## Switching Stacks -The x86_64 architecture is able to switch to a predefined stack when an exception occurs. However, it is a bit cumbersome to setup this mechanism. +The x86_64 architecture is able to switch to a predefined stack for some exceptions through an _Interrupt Stack Table_ (IST). The IST is a table of 7 pointers to known-good stacks. In Rust-like pseudo code: + +```rust +struct InterruptStackTable { + stack_pointers: [Option; 7], +} +``` + +For each exception handler, we can choose an IST stack through the options field in the [IDT entry]. For example, we could use the first stack in the IST for our double fault handler. Then the CPU would automatically switch to this stack _before_ it pushes anything. Thus, we are able to avoid the triple fault. + +[IDT entry]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} + +### The Task State Segment +The Interrupt Stack Table (IST) is part of an old legacy structure called [Task State Segment] (TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit x86 and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. + +[Task State Segment]: https://en.wikipedia.org/wiki/Task_state_segment +[hardware context switching]: http://wiki.osdev.org/Context_Switching#Hardware_Context_Switching + +On x86_64, the TSS no longer holds any task specific information at all. Instead, it holds two stack tables (the IST is one of them). The only common field between the 32-bit and 64-bit TSS is the pointer to the [I/O port permissions bitmap]. + +[I/O port permissions bitmap]: https://en.wikipedia.org/wiki/Task_state_segment#I.2FO_port_permissions + +The 64-bit TSS has the following format: + +Field | Type +------ | ---------------- +(reserved) | `u32` +Privilege Stack Table | `[u64; 3]` +(reserved) | `u64` +Interrupt Stack Table | `[u64; 7]` +(reserved) | `u64` +(reserved) | `u16` +I/O Map Base Address | `u16` + +The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we can safely ignore this table for now. + +Let's create a `TaskStateSegment` struct in new tss submodule: + +```rust +// in src/interrupts/mod.rs + +mod tss; + +// in src/interrupts/tss.rs + +use core::nonzero::NonZero; + +#[derive(Debug)] +#[repr(C, packed)] +pub struct TaskStateSegment { + reserved_0: u32, + pub privilege_stacks: PrivilegeStackTable, + reserved_1: u64, + pub interrupt_stacks: InterruptStackTable, + reserved_2: u64, + reserved_3: u16, + iomap_base: u16, +} + +#[derive(Debug)] +pub struct PrivilegeStackTable([Option; 3]); + +#[derive(Debug)] +pub struct InterruptStackTable([Option; 7]); + +#[derive(Debug)] +pub struct StackPointer(NonZero); +``` + +TODO lang item + + + However, it is a bit cumbersome to setup this mechanism. The mechanism consists of two main components: An _Interrupt Stack Table_ and a _Task State Segment_. From ef786e1fe898374bdcb2fda84054a9ad492a580d Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Tue, 20 Dec 2016 16:50:10 +0100 Subject: [PATCH 06/31] Update post and code --- blog/post/double-faults.md | 316 +++++++++++++++++++++++++++++++--- src/interrupts/mod.rs | 7 +- src/interrupts/tss.rs | 14 +- src/lib.rs | 5 +- src/memory/mod.rs | 15 +- src/memory/paging/mod.rs | 1 + src/memory/stack_allocator.rs | 66 +++++-- 7 files changed, 360 insertions(+), 64 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 2b9f13e2..13b5d79e 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -12,7 +12,7 @@ In simplified terms, a double fault is a special exception that occurs when the [IDT]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} -A double fault behaves like a normal exception. It has the vector number `8` and we can define a normal handler function for it in the IDT. It is really important to provide a double fault handler, because if a double faults is unhandled a fatal _triple fault_ occurs. Triple faults can't be caught and most hardware reacts with a system reset. +A double fault behaves like a normal exception. It has the vector number `8` and we can define a normal handler function for it in the IDT. It is really important to provide a double fault handler, because if a double fault is unhandled a fatal _triple fault_ occurs. Triple faults can't be caught and most hardware reacts with a system reset. ### Triggering a Double Fault Let's provoke a double fault by triggering an exception for that we didn't define a handler function yet: @@ -189,7 +189,9 @@ When we try this code in QEMU, we see that the system enters a boot-loop again. So how can we avoid this problem? We can't omit the pushing of the exception stack frame, since the CPU itself does it. So we need to ensure somehow that the stack is always valid when a double fault exception occurs. Fortunately, the x86_64 architecture has a solution to this problem. ## Switching Stacks -The x86_64 architecture is able to switch to a predefined stack for some exceptions through an _Interrupt Stack Table_ (IST). The IST is a table of 7 pointers to known-good stacks. In Rust-like pseudo code: +The x86_64 architecture is able to switch to a predefined, known-good stack when an exception occurs. This switch happens at hardware level, so it can be performed before the CPU pushes the exception stack frame. + +This switching mechanism is implemented as an _Interrupt Stack Table_ (IST). The IST is a table of 7 pointers to known-good stacks. In Rust-like pseudo code: ```rust struct InterruptStackTable { @@ -197,11 +199,249 @@ struct InterruptStackTable { } ``` -For each exception handler, we can choose an IST stack through the options field in the [IDT entry]. For example, we could use the first stack in the IST for our double fault handler. Then the CPU would automatically switch to this stack _before_ it pushes anything. Thus, we are able to avoid the triple fault. +For each exception handler, we can choose an stack from the IST through the `options` field in the corresponding [Interrupt Descriptor Table entry]. For example, we could use the first stack in the IST for our double fault handler. Then the CPU would automatically switch to this stack whenever a double fault occurs. This switch would happen before anything is pushed, so it would prevent the triple fault. -[IDT entry]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} +[Interrupt Descriptor Table entry]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} -### The Task State Segment +### Allocating a new Stack +In order to fill an Interrupt Stack Table later, we need a way to allocate new stacks. Therefore we extend our `memory` module with a new `stack_allocator` submodule: + +```rust +// in src/memory/mod.rs + +mod stack_allocator; + +``` + +#### The `stack_allocator` Module +First, we create a new `StackAllocator` struct and a constructor function: + +```rust +// in src/memory/stack_allocator.rs + +use memory::paging::PageIter; + +pub struct StackAllocator { + range: PageIter, +} + +impl StackAllocator { + pub fn new(page_range: PageIter) -> StackAllocator { + StackAllocator { range: page_range } + } +} +``` +We create a simple `StackAllocator` that allocates stacks from a given range of pages (`PageIter` is an Iterator over a range of pages; we introduced it [in the kernel heap post].). +TODO: Instead of adding a `StackAllocator::new` function, we use a separate `new_stack_allocator` function. This way, we can re-export `StackAllocator` from the `memory` module without re-exporting the `new` function. + +[in the kernel heap post]: {{% relref "08-kernel-heap.md#mapping-the-heap" %}} + +In order to allocate new stacks, we add a `alloc_stack` method: + +```rust +// in src/memory/stack_allocator.rs + +use memory::paging::{self, Page, ActivePageTable}; +use memory::{PAGE_SIZE, FrameAllocator}; + +impl StackAllocator { + pub fn alloc_stack(&mut self, + active_table: &mut ActivePageTable, + frame_allocator: &mut FA, + size_in_pages: usize) + -> Option { + if size_in_pages == 0 { + return None; /* a zero sized stack makes no sense */ + } + + // clone the range, since we only want to change it on success + let mut range = self.range.clone(); + + // try to allocate the stack pages and a guard page + let guard_page = range.next(); + let stack_start = range.next(); + let stack_end = if size_in_pages == 1 { + stack_start + } else { + // choose the (size_in_pages-2)th element, since index + // starts at 0 and we already allocated the start page + range.nth(size_in_pages - 2) + }; + + match (guard_page, stack_start, stack_end) { + (Some(_), Some(start), Some(end)) => { + // success! write back updated range + self.range = range; + + // map stack pages to physical frames + for page in Page::range_inclusive(start, end) { + active_table.map(page, paging::WRITABLE, frame_allocator); + } + + // create a new stack + let top_of_stack = end.start_address() + PAGE_SIZE; + Some(Stack::new(top_of_stack, start.start_address())) + } + _ => None, /* not enough pages */ + } + } +} +``` +The method takes mutable references to the [ActivePageTable] and a [FrameAllocator], since it needs to map the new virtual stack pages to physical frames. The stack size is a multiple of the page size. + +Instead of operating directly on `self.range`, we [clone] it and only write it back on success. This way, subsequent stack allocations can still succeed if there are pages left. For example, a call with `size_in_pages = 3` can still succeed after a failed call with `size_in_pages = 100`. In order to be able to clone `PageIter`, we add a `#[derive(Clone)]` to its definition in `src/memory/paging/mod.rs`. + +The actual allocation is straightforward: First, we choose the next page as [guard page]. Then we choose the next `size_in_pages` pages as stack pages using [Iterator::nth]. If all three variables are `Some`, the allocation succeeded and we map the stack pages to physical frames using [ActivePageTable::map]. The guard page remains unmapped. + +Finally, we create and return a new `Stack`, which is defined as follows: + +```rust +// in src/memory/stack_allocator.rs + +#[derive(Debug)] +pub struct Stack { + top: StackPointer, + bottom: StackPointer, +} + +impl Stack { + fn new(top: usize, bottom: usize) -> Stack { + assert!(top > bottom); + Stack { + top: StackPointer::new(top), + bottom: StackPointer::new(bottom), + } + } + + pub fn top(&self) -> StackPointer { + self.top + } +} + +use core::nonzero::NonZero; + +#[derive(Debug, Clone, Copy)] +pub struct StackPointer(NonZero); + +impl StackPointer { + fn new(ptr: usize) -> StackPointer { + assert!(ptr != 0); + StackPointer(unsafe { NonZero::new(ptr) }) + } +} + +impl Into for StackPointer { + fn into(self) -> usize { + *self.0 + } +} +``` +The `Stack` struct describes a stack though its top and bottom pointers. A stack pointer can never be `0`, so we use the unstable [NonZero] wrapper for `StackPointer`. This wrapper is an optimization that tells the compiler that it can use the value `0` to differentiate enum variants. Thus, an `Option` has always the same size as a bare `usize` (the value `0` is used to store the `None` case). We will require this property when we create the Interrupt Stack Table later. + +Since `NonZero` is unstable, we need to add `#![feature(nonzero)]` in our `lib.rs`. + +[NonZero]: https://doc.rust-lang.org/nightly/core/nonzero/struct.NonZero.html + +#### The Memory Controller +Now we're already able to allocate a new double fault stack. However, we add one more level of abstraction to make things nicer. For that we add a `MemoryController` type to our `memory` module: + +```rust +// in src/memory/mod.rs + +pub use self::stack_allocator::{Stack, StackPointer}; + +pub struct MemoryController { + active_table: paging::ActivePageTable, + frame_allocator: AreaFrameAllocator, + stack_allocator: stack_allocator::StackAllocator, +} + +impl MemoryController { + pub fn alloc_stack(&mut self, size_in_pages: usize) -> Option { + let &mut MemoryController { ref mut active_table, + ref mut frame_allocator, + ref mut stack_allocator } = self; + stack_allocator.alloc_stack(active_table, frame_allocator, + size_in_pages) + } +} +``` +The `MemoryController` struct holds the three types that are required for `alloc_stack` and provides a simpler interface (only one argument). The `alloc_stack` wrapper just takes the tree types as `&mut` through [destructuring] and forwards them to the `stack_allocator`. Note that we're re-exporting the `Stack` and `StackPointer` types since they are returned by `alloc_stack`. + +The last step is to create a `stack_allocator` and return a `MemoryController` from `memory::init`: + +```rust +// in src/memory/mod.rs + +pub fn init(boot_info: &BootInformation) -> MemoryController { + ... + + let stack_allocator = { + let stack_alloc_start = heap_end_page + 1; + let stack_alloc_end = stack_alloc_start + 100; + let stack_alloc_range = Page::range_inclusive(stack_alloc_start, + stack_alloc_end); + stack_allocator::new_stack_allocator(stack_alloc_range) + }; + + MemoryController { + active_table: active_table, + frame_allocator: frame_allocator, + stack_allocator: stack_allocator, + } +} +``` +We create a new `StackAllocator` with a range of 100 pages starting right after the last heap page. + +In order to do arithmetic on pages (e.g. calculate the hundredth page after `stack_alloc_start`), we implement `Add` for `Page`: + +```rust +// in src/memory/paging/mod.rs + +impl Add for Page { + type Output = Page; + + fn add(self, rhs: usize) -> Page { + Page { number: self.number + rhs } + } +} +``` + +#### Allocating a Double Fault Stack +Now we can allocate a new double fault stack by passing the memory controller to our `interrupts::init` function: + +{{< highlight rust "hl_lines=8 11 12 21 22 23" >}} +// in src/lib.rs + +#[no_mangle] +pub extern "C" fn rust_main(multiboot_information_address: usize) { + ... + + // set up guard page and map the heap pages + let mut memory_controller = memory::init(boot_info); // new return type + + // initialize our IDT + interrupts::init(&mut memory_controller); // new argument + + ... +} + + +// in src/interrupts/mod.rs + +use memory::MemoryController; + +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + + IDT.load(); +} +{{< / highlight >}} + +We allocate a 4096 bytes stack (one page) for our double fault handler. Now we just need some way to tell the CPU that it should use this stack for handling double faults. + +### The IST and TSS The Interrupt Stack Table (IST) is part of an old legacy structure called [Task State Segment] (TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit x86 and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. [Task State Segment]: https://en.wikipedia.org/wiki/Task_state_segment @@ -223,9 +463,9 @@ Interrupt Stack Table | `[u64; 7]` (reserved) | `u16` I/O Map Base Address | `u16` -The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we can safely ignore this table for now. +The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we ignore this table for now. -Let's create a `TaskStateSegment` struct in new tss submodule: +Let's create a `TaskStateSegment` struct in a new `tss` submodule: ```rust // in src/interrupts/mod.rs @@ -234,8 +474,6 @@ mod tss; // in src/interrupts/tss.rs -use core::nonzero::NonZero; - #[derive(Debug)] #[repr(C, packed)] pub struct TaskStateSegment { @@ -248,27 +486,61 @@ pub struct TaskStateSegment { iomap_base: u16, } +use memory::StackPointer; + #[derive(Debug)] pub struct PrivilegeStackTable([Option; 3]); #[derive(Debug)] pub struct InterruptStackTable([Option; 7]); - -#[derive(Debug)] -pub struct StackPointer(NonZero); ``` -TODO lang item +We use [repr\(C)] for the struct since the order is fields is important. We also use `[repr(packed)]` because otherwise the compiler might insert additional padding between the `reserved_0` and `privilege_stacks` fields. +The `PrivilegeStackTable` and `InterruptStackTable` types are just newtype wrappers for arrays of `Option`. Here it becomes important that we implemented `NonZero` for `StackPointer`: Thus, an `Option` still has the required size of 64 bits. - However, it is a bit cumbersome to setup this mechanism. +Let's add a `TaskStateSegment::new` function that creates an empty TSS: -The mechanism consists of two main components: An _Interrupt Stack Table_ and a _Task State Segment_. +```rust +impl TaskStateSegment { + pub fn new() -> TaskStateSegment { + TaskStateSegment { + privilege_stacks: PrivilegeStackTable([None, None, None]), + interrupt_stacks: InterruptStackTable( + [None, None, None, None, None, None, None]), + iomap_base: 0, + reserved_0: 0, + reserved_1: 0, + reserved_2: 0, + reserved_3: 0, + } + } +} +``` +We also add a `InterruptStackTable::insert_stack` method, that inserts a given stack into a free table entry: + +```rust +use memory::Stack; + +impl InterruptStackTable { + pub fn insert_stack(&mut self, stack: Stack) -> Result { + // TSS index starts at 1, so we do a `zip(1..)` + for (entry, i) in self.0.iter_mut().zip(1..) { + if entry.is_none() { + *entry = Some(stack.top()); + return Ok(i); + } + } + Err(stack) + } +} +``` +The function iterates over the table and places the stack pointer in the first free entry. In the case of success, we return the table index of the inserted pointer. If there's no free entry left, we return the stack back to the caller as `Err`. + +#### Creating a TSS +Let's build a new TSS that contains our double fault stack in its Interrupt Stack Table. -Switching stacks -The Interrupt Stack Table -The Task State Segment The Global Descriptor Table (again) Putting it together What’s next? @@ -282,11 +554,3 @@ Let's start by creating a handler function for double faults: ``` Next, we need to register the double fault handler in our IDT: - - -Double faults also occur when an exception occurs while the CPU is trying to invoke an exception handler. For example, let's assume a divide-by-zero exception occurs but the OS accidentally [swapped out] the corresponding handler function. Now the CPU tries to call the divide-by-zero handler, which - - -A double fault occurs whenever the CPU fails to call an exception handler. On a high level it's like a catch-all handler, similar to `catch(...)` in C++ or `catch(Exception e)` in Java or C#. - -The most common case is that there isn't a handler defined in the IDT. However, a double fault also occurs if the exception handler lies on a unaccessible page of if the CPU fails to push the exception stack frame. diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 246a70cb..fd4bfef4 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -8,7 +8,7 @@ // except according to those terms. use spin::Once; -use memory::StackPointer; +use memory::MemoryController; mod idt; mod tss; @@ -95,7 +95,10 @@ static IDT: Once = Once::new(); static TSS: Once = Once::new(); static GDT: Once = Once::new(); -pub fn init(double_fault_stack: StackPointer) { +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + let mut double_fault_ist_index = 0; let tss = TSS.call_once(|| { diff --git a/src/interrupts/tss.rs b/src/interrupts/tss.rs index 47e38a53..ab9273bc 100644 --- a/src/interrupts/tss.rs +++ b/src/interrupts/tss.rs @@ -1,4 +1,4 @@ -use memory::StackPointer; +use memory::{Stack, StackPointer}; #[derive(Debug)] #[repr(C, packed)] @@ -16,7 +16,7 @@ impl TaskStateSegment { pub fn new() -> TaskStateSegment { TaskStateSegment { privilege_stacks: PrivilegeStackTable([None, None, None]), - interrupt_stacks: InterruptStackTable::new(), + interrupt_stacks: InterruptStackTable([None, None, None, None, None, None, None]), iomap_base: 0, reserved_0: 0, reserved_1: 0, @@ -33,18 +33,14 @@ pub struct PrivilegeStackTable([Option; 3]); pub struct InterruptStackTable([Option; 7]); impl InterruptStackTable { - pub fn new() -> InterruptStackTable { - InterruptStackTable([None, None, None, None, None, None, None]) - } - - pub fn insert_stack(&mut self, stack_pointer: StackPointer) -> Result { + pub fn insert_stack(&mut self, stack: Stack) -> Result { // TSS index starts at 1 for (entry, i) in self.0.iter_mut().zip(1..) { if entry.is_none() { - *entry = Some(stack_pointer); + *entry = Some(stack.top()); return Ok(i); } } - Err(stack_pointer) + Err(stack) } } diff --git a/src/lib.rs b/src/lib.rs index d53a90d3..74f77159 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,11 +55,8 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { // set up guard page and map the heap pages let mut memory_controller = memory::init(boot_info); - // initialize our IDT - let double_fault_stack = memory_controller.alloc_stack(1) - .expect("could not allocate double fault stack"); - interrupts::init(double_fault_stack); + interrupts::init(&mut memory_controller); unsafe { int!(3) }; diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 25c11f4f..aa94d33b 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -9,7 +9,7 @@ pub use self::area_frame_allocator::AreaFrameAllocator; pub use self::paging::remap_the_kernel; -pub use self::stack_allocator::{StackAllocator, StackPointer}; +pub use self::stack_allocator::{Stack, StackPointer}; use self::paging::PhysicalAddress; use multiboot2::BootInformation; @@ -62,11 +62,10 @@ pub fn init(boot_info: &BootInformation) -> MemoryController { } let stack_allocator = { - let stack_alloc_start_page = heap_end_page + 1; - let stack_alloc_end_page = stack_alloc_start_page + 100; - let stack_alloc_page_range = Page::range_inclusive(stack_alloc_start_page, - stack_alloc_end_page); - stack_allocator::new_stack_allocator(stack_alloc_page_range) + let stack_alloc_start = heap_end_page + 1; + let stack_alloc_end = stack_alloc_start + 100; + let stack_alloc_range = Page::range_inclusive(stack_alloc_start, stack_alloc_end); + stack_allocator::new_stack_allocator(stack_alloc_range) }; MemoryController { @@ -79,11 +78,11 @@ pub fn init(boot_info: &BootInformation) -> MemoryController { pub struct MemoryController { active_table: paging::ActivePageTable, frame_allocator: AreaFrameAllocator, - stack_allocator: StackAllocator, + stack_allocator: stack_allocator::StackAllocator, } impl MemoryController { - pub fn alloc_stack(&mut self, size_in_pages: usize) -> Result { + pub fn alloc_stack(&mut self, size_in_pages: usize) -> Option { let &mut MemoryController { ref mut active_table, ref mut frame_allocator, ref mut stack_allocator } = self; diff --git a/src/memory/paging/mod.rs b/src/memory/paging/mod.rs index 777d6202..2d7b48d7 100644 --- a/src/memory/paging/mod.rs +++ b/src/memory/paging/mod.rs @@ -70,6 +70,7 @@ impl Add for Page { } } +#[derive(Debug, Clone)] pub struct PageIter { start: Page, end: Page, diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs index 2e7496fb..cc29d577 100644 --- a/src/memory/stack_allocator.rs +++ b/src/memory/stack_allocator.rs @@ -15,37 +15,73 @@ impl StackAllocator { active_table: &mut ActivePageTable, frame_allocator: &mut FA, size_in_pages: usize) - -> Result { + -> Option { if size_in_pages == 0 { - return Err(()); + return None; } - let _guard_page = self.range.next().ok_or(())?; + let mut range = self.range.clone(); - let stack_start = self.range.next().ok_or(())?; + // try to allocate the stack pages and a guard page + let guard_page = range.next(); + let stack_start = range.next(); let stack_end = if size_in_pages == 1 { stack_start } else { - self.range.nth(size_in_pages - 1).ok_or(())? + range.nth(size_in_pages - 2) }; - for page in Page::range_inclusive(stack_start, stack_end) { - active_table.map(page, paging::WRITABLE, frame_allocator); - } + match (guard_page, stack_start, stack_end) { + (Some(_), Some(start), Some(end)) => { + // success! write back updated range + self.range = range; - let top_of_stack = stack_end.start_address() + PAGE_SIZE; - StackPointer::new(top_of_stack).ok_or(()) + // map stack pages to physical frames + for page in Page::range_inclusive(start, end) { + active_table.map(page, paging::WRITABLE, frame_allocator); + } + + // create a new stack + let top_of_stack = end.start_address() + PAGE_SIZE; + Some(Stack::new(top_of_stack, start.start_address())) + } + _ => None, /* not enough pages */ + } } } #[derive(Debug)] +pub struct Stack { + top: StackPointer, + bottom: StackPointer, +} + +impl Stack { + fn new(top: usize, bottom: usize) -> Stack { + assert!(top > bottom); + Stack { + top: StackPointer::new(top), + bottom: StackPointer::new(bottom), + } + } + + pub fn top(&self) -> StackPointer { + self.top + } +} + +#[derive(Debug, Clone, Copy)] pub struct StackPointer(NonZero); impl StackPointer { - fn new(ptr: usize) -> Option { - match ptr { - 0 => None, - ptr => Some(StackPointer(unsafe { NonZero::new(ptr) })), - } + fn new(ptr: usize) -> StackPointer { + assert!(ptr != 0); + StackPointer(unsafe { NonZero::new(ptr) }) + } +} + +impl Into for StackPointer { + fn into(self) -> usize { + *self.0 } } From d1fb1516fa782f71aeaedf4880a8a9b238f06268 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 21 Dec 2016 00:10:42 +0100 Subject: [PATCH 07/31] Minor improvements to post --- blog/post/double-faults.md | 60 ++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 13b5d79e..2e9a5594 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -8,14 +8,14 @@ In this post we will make our kernel completely exception-proof by catching doub ## What is a Double Fault? -In simplified terms, a double fault is a special exception that occurs when the CPU can't invoke an exception handler. For example, it occurs when a page fault is triggered but there is no page fault handler registered in the [IDT]. So it's kind of similar to catch-all blocks in programming languages with exceptions, e.g. `catch(...)` in C++ or `catch(Exception e)` in Java or C#. +In simplified terms, a double fault is a special exception that occurs when the CPU fails to invoke an exception handler. For example, it occurs when a page fault is triggered but there is no page fault handler registered in the [Interrupt Descriptor Table][IDT] (IDT). So it's kind of similar to catch-all blocks in programming languages with exceptions, e.g. `catch(...)` in C++ or `catch(Exception e)` in Java or C#. [IDT]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} A double fault behaves like a normal exception. It has the vector number `8` and we can define a normal handler function for it in the IDT. It is really important to provide a double fault handler, because if a double fault is unhandled a fatal _triple fault_ occurs. Triple faults can't be caught and most hardware reacts with a system reset. ### Triggering a Double Fault -Let's provoke a double fault by triggering an exception for that we didn't define a handler function yet: +Let's provoke a double fault by triggering an exception for that we didn't define a handler function: {{< highlight rust "hl_lines=10" >}} // in src/lib.rs @@ -58,7 +58,7 @@ The reason for the boot loop is the following: [int 1]: https://en.wikipedia.org/wiki/INT_(x86_instruction) -So in order to prevent this triple fault, we need to either provide a handler function for `Debug` exceptions or a double fault handler. We will do the latter, since this post is all about the double fault. +So in order to prevent this triple fault, we need to either provide a handler function for `Debug` exceptions or a double fault handler. We will do the latter, since we want to avoid triple faults completely. ### A Double Fault Handler A double fault is a normal exception with an error code, so we can use our `handler_with_error_code` macro to create a wrapper function: @@ -103,14 +103,14 @@ It worked! Here is what happens this time: The triple fault (and the boot-loop) no longer occurs, since the CPU can now call the double fault handler. -That was pretty straightforward! So why do we need a whole post for this topic? Well, we're now able to catch _most_ double faults, but there are some cases where our current approach doesn't suffice. +That was quite straightforward! So why do we need a whole post for this topic? Well, we're now able to catch _most_ double faults, but there are some cases where our current approach doesn't suffice. ## Causes of Double Faults Before we look at the special cases, we need to know the exact causes of double faults. Above, we used a pretty vague definition: -> A double fault is a special exception that occurs when the CPU can't invoke an exception handler. +> A double fault is a special exception that occurs when the CPU fails to invoke an exception handler. -What does _“can't invoke”_ mean exactly? The handler is not present? The handler is [swapped out]? And what happens if a handler causes exceptions itself? +What does _“fails to invoke”_ mean exactly? The handler is not present? The handler is [swapped out]? And what happens if a handler causes exceptions itself? [swapped out]: http://pages.cs.wisc.edu/~remzi/OSTEP/vm-beyondphys.pdf @@ -118,7 +118,7 @@ For example, what happens if… : 1. a divide-by-zero exception occurs, but the corresponding handler function is swapped out? 2. a page fault occurs, but the page fault handler is swapped out? -3. a divide-by-zero handler invokes a breakpoint exception, but the breakpoint handler is swapped out? +3. a divide-by-zero handler causes a breakpoint exception, but the breakpoint handler is swapped out? 4. our kernel overflows its stack and the [guard page] is hit? [guard page]: {{% relref "07-remap-the-kernel.md#creating-a-guard-page" %}} @@ -144,9 +144,9 @@ So for example a divide-by-zero fault followed by a page fault is fine (the page With the help of this table, we can answer the first three of the above questions: -1. When a divide-by-zero exception occurs and the corresponding handler function is swapped out, a _page fault_ occurs and the _page fault handler_ is invoked. -2. When a page fault occurs and the page fault handler is swapped out, a _double fault_ occurs and the _double fault_ handler is invoked. -3. When a divide-by-zero handler invokes a breakpoint exception and the breakpoint handler is swapped out, a _breakpoint exception_ occurs first. However, the corresponding handler is swapped out, so a _page fault_ occurs and the _page fault handler_ is invoked. +1. If a divide-by-zero exception occurs and the corresponding handler function is swapped out, a _page fault_ occurs and the _page fault handler_ is invoked. +2. If a page fault occurs and the page fault handler is swapped out, a _double fault_ occurs and the _double fault handler_ is invoked. +3. If a divide-by-zero handler causes a breakpoint exception, the CPU tries to invoke the breakpoint handler. If the breakpoint handler is swapped out, a _page fault_ occurs and the _page fault handler_ is invoked. In fact, even the case of a non-present handler follows this scheme: A non-present handler causes a _segment-not-present_ exception. We didn't define a segment-not-present handler, so another segment-not-present exception occurs. According to the table, this leads to a double fault. @@ -155,7 +155,7 @@ Let's look at the fourth question: > What happens if our kernel overflows its stack and the [guard page] is hit? -When our kernel overflows its stack and hits the guard page, a _page fault_ occurs and the CPU invokes the page fault handler. However, the CPU also tries to push the [exception stack frame] onto the stack. This fails of course, since our current stack pointer still points to the guard page. Thus, a second page fault occurs, which causes a double fault (according to the above table). +When our kernel overflows its stack and hits the guard page, a _page fault_ occurs. The CPU looks up the page fault handler in the IDT and tries to push the [exception stack frame] onto the stack. However, our current stack pointer still points to the non-present guard page. Thus, a second page fault occurs, which causes a double fault (according to the above table). [exception stack frame]: http://os.phil-opp.com/better-exception-messages.html#exceptions-in-detail @@ -199,9 +199,9 @@ struct InterruptStackTable { } ``` -For each exception handler, we can choose an stack from the IST through the `options` field in the corresponding [Interrupt Descriptor Table entry]. For example, we could use the first stack in the IST for our double fault handler. Then the CPU would automatically switch to this stack whenever a double fault occurs. This switch would happen before anything is pushed, so it would prevent the triple fault. +For each exception handler, we can choose an stack from the IST through the `options` field in the corresponding [IDT entry]. For example, we could use the first stack in the IST for our double fault handler. Then the CPU would automatically switch to this stack whenever a double fault occurs. This switch would happen before anything is pushed, so it would prevent the triple fault. -[Interrupt Descriptor Table entry]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} +[IDT entry]: {{% relref "09-catching-exceptions.md#the-interrupt-descriptor-table" %}} ### Allocating a new Stack In order to fill an Interrupt Stack Table later, we need a way to allocate new stacks. Therefore we extend our `memory` module with a new `stack_allocator` submodule: @@ -232,7 +232,6 @@ impl StackAllocator { } ``` We create a simple `StackAllocator` that allocates stacks from a given range of pages (`PageIter` is an Iterator over a range of pages; we introduced it [in the kernel heap post].). -TODO: Instead of adding a `StackAllocator::new` function, we use a separate `new_stack_allocator` function. This way, we can re-export `StackAllocator` from the `memory` module without re-exporting the `new` function. [in the kernel heap post]: {{% relref "08-kernel-heap.md#mapping-the-heap" %}} @@ -289,11 +288,19 @@ impl StackAllocator { ``` The method takes mutable references to the [ActivePageTable] and a [FrameAllocator], since it needs to map the new virtual stack pages to physical frames. The stack size is a multiple of the page size. -Instead of operating directly on `self.range`, we [clone] it and only write it back on success. This way, subsequent stack allocations can still succeed if there are pages left. For example, a call with `size_in_pages = 3` can still succeed after a failed call with `size_in_pages = 100`. In order to be able to clone `PageIter`, we add a `#[derive(Clone)]` to its definition in `src/memory/paging/mod.rs`. +[ActivePageTable]: {{% relref "06-page-tables.md#page-table-ownership" %}} +[FrameAllocator]: {{% relref "05-allocating-frames.md#a-frame-allocator" %}} + +Instead of operating directly on `self.range`, we [clone] it and only write it back on success. This way, subsequent stack allocations can still succeed if there are pages left (e.g., a call with `size_in_pages = 3` can still succeed after a failed call with `size_in_pages = 100`). In order to be able to clone `PageIter`, we add a `#[derive(Clone)]` to its definition in `src/memory/paging/mod.rs`. + +[clone]: https://doc.rust-lang.org/nightly/core/clone/trait.Clone.html#tymethod.clone The actual allocation is straightforward: First, we choose the next page as [guard page]. Then we choose the next `size_in_pages` pages as stack pages using [Iterator::nth]. If all three variables are `Some`, the allocation succeeded and we map the stack pages to physical frames using [ActivePageTable::map]. The guard page remains unmapped. -Finally, we create and return a new `Stack`, which is defined as follows: +[Iterator::nth]: https://doc.rust-lang.org/nightly/core/iter/trait.Iterator.html#method.nth +[ActivePageTable::map]: {{% relref "06-page-tables.md#more-mapping-functions" %}} + +Finally, we create and return a new `Stack`, which we define as follows: ```rust // in src/memory/stack_allocator.rs @@ -336,14 +343,14 @@ impl Into for StackPointer { } } ``` -The `Stack` struct describes a stack though its top and bottom pointers. A stack pointer can never be `0`, so we use the unstable [NonZero] wrapper for `StackPointer`. This wrapper is an optimization that tells the compiler that it can use the value `0` to differentiate enum variants. Thus, an `Option` has always the same size as a bare `usize` (the value `0` is used to store the `None` case). We will require this property when we create the Interrupt Stack Table later. +The `Stack` struct describes a stack though its top and bottom pointers. A stack pointer can never be `0`, so we use the unstable [NonZero] wrapper for `StackPointer`. This wrapper is an optimization that tells the compiler that it can use the value `0` to differentiate enum variants. Thus, an `Option` has always the same size as a bare `usize` (the value `0` is used to store the `None` case). We will require this feature when we create the Interrupt Stack Table later. Since `NonZero` is unstable, we need to add `#![feature(nonzero)]` in our `lib.rs`. [NonZero]: https://doc.rust-lang.org/nightly/core/nonzero/struct.NonZero.html #### The Memory Controller -Now we're already able to allocate a new double fault stack. However, we add one more level of abstraction to make things nicer. For that we add a `MemoryController` type to our `memory` module: +Now we're able to allocate a new double fault stack. However, we add one more level of abstraction to make things easier. For that we add a new `MemoryController` type to our `memory` module: ```rust // in src/memory/mod.rs @@ -366,9 +373,12 @@ impl MemoryController { } } ``` -The `MemoryController` struct holds the three types that are required for `alloc_stack` and provides a simpler interface (only one argument). The `alloc_stack` wrapper just takes the tree types as `&mut` through [destructuring] and forwards them to the `stack_allocator`. Note that we're re-exporting the `Stack` and `StackPointer` types since they are returned by `alloc_stack`. +The `MemoryController` struct holds the three types that are required for `alloc_stack` and provides a simpler interface (only one argument). The `alloc_stack` wrapper just takes the tree types as `&mut` through [destructuring] and forwards them to the `stack_allocator`. The [ref mut]-s are needed to take the inner fields by mutable reference. Note that we're re-exporting the `Stack` and `StackPointer` types since they are returned by `alloc_stack`. -The last step is to create a `stack_allocator` and return a `MemoryController` from `memory::init`: +[destructuring]: http://rust-lang.github.io/book/chXX-patterns.html#Destructuring +[ref mut]: http://rust-lang.github.io/book/chXX-patterns.html#ref-and-ref-mut + +The last step is to create a `StackAllocator` and return a `MemoryController` from `memory::init`: ```rust // in src/memory/mod.rs @@ -442,7 +452,7 @@ pub fn init(memory_controller: &mut MemoryController) { We allocate a 4096 bytes stack (one page) for our double fault handler. Now we just need some way to tell the CPU that it should use this stack for handling double faults. ### The IST and TSS -The Interrupt Stack Table (IST) is part of an old legacy structure called [Task State Segment] (TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit x86 and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. +The Interrupt Stack Table (IST) is part of an old legacy structure called [Task State Segment] \(TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit x86 and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. [Task State Segment]: https://en.wikipedia.org/wiki/Task_state_segment [hardware context switching]: http://wiki.osdev.org/Context_Switching#Hardware_Context_Switching @@ -455,12 +465,12 @@ The 64-bit TSS has the following format: Field | Type ------ | ---------------- -(reserved) | `u32` +(reserved) | `u32` Privilege Stack Table | `[u64; 3]` -(reserved) | `u64` +(reserved) | `u64` Interrupt Stack Table | `[u64; 7]` -(reserved) | `u64` -(reserved) | `u16` +(reserved) | `u64` +(reserved) | `u16` I/O Map Base Address | `u16` The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we ignore this table for now. From ceb44d9c2eb51bad326f0a9c90e53d0f088762a5 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 21 Dec 2016 00:12:19 +0100 Subject: [PATCH 08/31] Use x86's TaskStateSegment and use usize for stack pointers --- blog/post/double-faults.md | 109 ++++------------------------------ src/interrupts/gdt.rs | 6 +- src/interrupts/idt.rs | 2 +- src/interrupts/mod.rs | 16 ++--- src/interrupts/tss.rs | 46 -------------- src/lib.rs | 1 - src/memory/mod.rs | 2 +- src/memory/stack_allocator.rs | 27 +++------ 8 files changed, 31 insertions(+), 178 deletions(-) delete mode 100644 src/interrupts/tss.rs diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 2e9a5594..1e4d2b3c 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -307,47 +307,29 @@ Finally, we create and return a new `Stack`, which we define as follows: #[derive(Debug)] pub struct Stack { - top: StackPointer, - bottom: StackPointer, + top: usize, + bottom: usize, } impl Stack { fn new(top: usize, bottom: usize) -> Stack { assert!(top > bottom); Stack { - top: StackPointer::new(top), - bottom: StackPointer::new(bottom), + top: top, + bottom: bottom, } } pub fn top(&self) -> StackPointer { self.top } -} -use core::nonzero::NonZero; - -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(NonZero); - -impl StackPointer { - fn new(ptr: usize) -> StackPointer { - assert!(ptr != 0); - StackPointer(unsafe { NonZero::new(ptr) }) - } -} - -impl Into for StackPointer { - fn into(self) -> usize { - *self.0 + pub fn bottom(&self) -> StackPointer { + self.bottom } } ``` -The `Stack` struct describes a stack though its top and bottom pointers. A stack pointer can never be `0`, so we use the unstable [NonZero] wrapper for `StackPointer`. This wrapper is an optimization that tells the compiler that it can use the value `0` to differentiate enum variants. Thus, an `Option` has always the same size as a bare `usize` (the value `0` is used to store the `None` case). We will require this feature when we create the Interrupt Stack Table later. - -Since `NonZero` is unstable, we need to add `#![feature(nonzero)]` in our `lib.rs`. - -[NonZero]: https://doc.rust-lang.org/nightly/core/nonzero/struct.NonZero.html +The `Stack` struct describes a stack though its top and bottom addresses. #### The Memory Controller Now we're able to allocate a new double fault stack. However, we add one more level of abstraction to make things easier. For that we add a new `MemoryController` type to our `memory` module: @@ -355,7 +337,7 @@ Now we're able to allocate a new double fault stack. However, we add one more le ```rust // in src/memory/mod.rs -pub use self::stack_allocator::{Stack, StackPointer}; +pub use self::stack_allocator::Stack; pub struct MemoryController { active_table: paging::ActivePageTable, @@ -475,82 +457,17 @@ I/O Map Base Address | `u16` The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we ignore this table for now. -Let's create a `TaskStateSegment` struct in a new `tss` submodule: +#### Creating a TSS +Let's create a new TSS that contains our double fault stack in its Interrupt Stack Table. For that we need a TSS struct. Fortunately, the `x86` crate already contains a [`TaskStateSegment` struct] that we can use: + +[`TaskStateSegment` struct]: https://docs.rs/x86/0.7.1/x86/task/struct.TaskStateSegment.html ```rust // in src/interrupts/mod.rs -mod tss; - -// in src/interrupts/tss.rs - -#[derive(Debug)] -#[repr(C, packed)] -pub struct TaskStateSegment { - reserved_0: u32, - pub privilege_stacks: PrivilegeStackTable, - reserved_1: u64, - pub interrupt_stacks: InterruptStackTable, - reserved_2: u64, - reserved_3: u16, - iomap_base: u16, -} - -use memory::StackPointer; - -#[derive(Debug)] -pub struct PrivilegeStackTable([Option; 3]); - -#[derive(Debug)] -pub struct InterruptStackTable([Option; 7]); +use x86::task::TaskStateSegment; ``` -We use [repr\(C)] for the struct since the order is fields is important. We also use `[repr(packed)]` because otherwise the compiler might insert additional padding between the `reserved_0` and `privilege_stacks` fields. - -The `PrivilegeStackTable` and `InterruptStackTable` types are just newtype wrappers for arrays of `Option`. Here it becomes important that we implemented `NonZero` for `StackPointer`: Thus, an `Option` still has the required size of 64 bits. - -Let's add a `TaskStateSegment::new` function that creates an empty TSS: - -```rust -impl TaskStateSegment { - pub fn new() -> TaskStateSegment { - TaskStateSegment { - privilege_stacks: PrivilegeStackTable([None, None, None]), - interrupt_stacks: InterruptStackTable( - [None, None, None, None, None, None, None]), - iomap_base: 0, - reserved_0: 0, - reserved_1: 0, - reserved_2: 0, - reserved_3: 0, - } - } -} -``` - -We also add a `InterruptStackTable::insert_stack` method, that inserts a given stack into a free table entry: - -```rust -use memory::Stack; - -impl InterruptStackTable { - pub fn insert_stack(&mut self, stack: Stack) -> Result { - // TSS index starts at 1, so we do a `zip(1..)` - for (entry, i) in self.0.iter_mut().zip(1..) { - if entry.is_none() { - *entry = Some(stack.top()); - return Ok(i); - } - } - Err(stack) - } -} -``` -The function iterates over the table and places the stack pointer in the first free entry. In the case of success, we return the table index of the inserted pointer. If there's no free entry left, we return the stack back to the caller as `Err`. - -#### Creating a TSS -Let's build a new TSS that contains our double fault stack in its Interrupt Stack Table. - The Global Descriptor Table (again) Putting it together What’s next? diff --git a/src/interrupts/gdt.rs b/src/interrupts/gdt.rs index 76418808..23497d27 100644 --- a/src/interrupts/gdt.rs +++ b/src/interrupts/gdt.rs @@ -1,6 +1,6 @@ use bit_field::BitField; use collections::vec::Vec; -use interrupts::tss::TaskStateSegment; +use x86::bits64::task::TaskStateSegment; pub struct Gdt(Vec); @@ -26,11 +26,11 @@ impl Gdt { } pub fn load(&'static self) { - use x86::dtables::{DescriptorTablePointer, lgdt}; + use x86::shared::dtables::{DescriptorTablePointer, lgdt}; use core::mem::size_of; let ptr = DescriptorTablePointer { - base: self.0.as_ptr() as u64, + base: self.0.as_ptr() as *const ::x86::shared::segmentation::SegmentDescriptor, limit: (self.0.len() * size_of::() - 1) as u16, }; diff --git a/src/interrupts/idt.rs b/src/interrupts/idt.rs index 0d290150..63f81410 100644 --- a/src/interrupts/idt.rs +++ b/src/interrupts/idt.rs @@ -109,7 +109,7 @@ impl EntryOptions { #[allow(dead_code)] pub fn set_stack_index(&mut self, index: u8) -> &mut Self { - self.0.set_range(0..3, index.into()); + self.0.set_range(0..3, (index + 1).into()); self } } diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index fd4bfef4..5719411d 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -9,9 +9,9 @@ use spin::Once; use memory::MemoryController; +use x86::bits64::task::TaskStateSegment; mod idt; -mod tss; mod gdt; macro_rules! save_scratch_registers { @@ -92,22 +92,18 @@ macro_rules! handler_with_error_code { } static IDT: Once = Once::new(); -static TSS: Once = Once::new(); +static TSS: Once = Once::new(); static GDT: Once = Once::new(); pub fn init(memory_controller: &mut MemoryController) { let double_fault_stack = memory_controller.alloc_stack(1) .expect("could not allocate double fault stack"); - let mut double_fault_ist_index = 0; + const DOUBLE_FAULT_IST_INDEX: u8 = 0; let tss = TSS.call_once(|| { - let mut tss = tss::TaskStateSegment::new(); - - double_fault_ist_index = tss.interrupt_stacks - .insert_stack(double_fault_stack) - .expect("IST flush_all"); - + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX as usize] = double_fault_stack.top() as u64; tss }); @@ -134,7 +130,7 @@ pub fn init(memory_controller: &mut MemoryController) { idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); idt.set_handler(8, handler_with_error_code!(double_fault_handler)) - .set_stack_index(double_fault_ist_index); + .set_stack_index(DOUBLE_FAULT_IST_INDEX); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt diff --git a/src/interrupts/tss.rs b/src/interrupts/tss.rs deleted file mode 100644 index ab9273bc..00000000 --- a/src/interrupts/tss.rs +++ /dev/null @@ -1,46 +0,0 @@ -use memory::{Stack, StackPointer}; - -#[derive(Debug)] -#[repr(C, packed)] -pub struct TaskStateSegment { - reserved_0: u32, - pub privilege_stacks: PrivilegeStackTable, - reserved_1: u64, - pub interrupt_stacks: InterruptStackTable, - reserved_2: u64, - reserved_3: u16, - iomap_base: u16, -} - -impl TaskStateSegment { - pub fn new() -> TaskStateSegment { - TaskStateSegment { - privilege_stacks: PrivilegeStackTable([None, None, None]), - interrupt_stacks: InterruptStackTable([None, None, None, None, None, None, None]), - iomap_base: 0, - reserved_0: 0, - reserved_1: 0, - reserved_2: 0, - reserved_3: 0, - } - } -} - -#[derive(Debug)] -pub struct PrivilegeStackTable([Option; 3]); - -#[derive(Debug)] -pub struct InterruptStackTable([Option; 7]); - -impl InterruptStackTable { - pub fn insert_stack(&mut self, stack: Stack) -> Result { - // TSS index starts at 1 - for (entry, i) in self.0.iter_mut().zip(1..) { - if entry.is_none() { - *entry = Some(stack.top()); - return Ok(i); - } - } - Err(stack) - } -} diff --git a/src/lib.rs b/src/lib.rs index 74f77159..c2149d2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,6 @@ #![feature(asm)] #![feature(naked_functions)] #![feature(core_intrinsics)] -#![feature(nonzero)] #![feature(drop_types_in_const)] #![no_std] diff --git a/src/memory/mod.rs b/src/memory/mod.rs index aa94d33b..22693792 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -9,7 +9,7 @@ pub use self::area_frame_allocator::AreaFrameAllocator; pub use self::paging::remap_the_kernel; -pub use self::stack_allocator::{Stack, StackPointer}; +pub use self::stack_allocator::Stack; use self::paging::PhysicalAddress; use multiboot2::BootInformation; diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs index cc29d577..b8abc9f8 100644 --- a/src/memory/stack_allocator.rs +++ b/src/memory/stack_allocator.rs @@ -1,6 +1,5 @@ use memory::paging::{self, Page, PageIter, ActivePageTable}; use memory::{PAGE_SIZE, FrameAllocator}; -use core::nonzero::NonZero; pub fn new_stack_allocator(page_range: PageIter) -> StackAllocator { StackAllocator { range: page_range } @@ -52,36 +51,24 @@ impl StackAllocator { #[derive(Debug)] pub struct Stack { - top: StackPointer, - bottom: StackPointer, + top: usize, + bottom: usize, } impl Stack { fn new(top: usize, bottom: usize) -> Stack { assert!(top > bottom); Stack { - top: StackPointer::new(top), - bottom: StackPointer::new(bottom), + top: top, + bottom: bottom, } } - pub fn top(&self) -> StackPointer { + pub fn top(&self) -> usize { self.top } -} -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(NonZero); - -impl StackPointer { - fn new(ptr: usize) -> StackPointer { - assert!(ptr != 0); - StackPointer(unsafe { NonZero::new(ptr) }) - } -} - -impl Into for StackPointer { - fn into(self) -> usize { - *self.0 + pub fn bottom(&self) -> usize { + self.bottom } } From 3e6b1a8a35f8abef4541a91b7f27b75b88a05ea1 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 28 Dec 2016 16:19:59 +0100 Subject: [PATCH 09/31] Rewrite gdt module using an array instead of Vec --- src/interrupts/gdt.rs | 109 ++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 69 deletions(-) diff --git a/src/interrupts/gdt.rs b/src/interrupts/gdt.rs index 23497d27..e67956f3 100644 --- a/src/interrupts/gdt.rs +++ b/src/interrupts/gdt.rs @@ -1,28 +1,42 @@ use bit_field::BitField; -use collections::vec::Vec; use x86::bits64::task::TaskStateSegment; +use x86::shared::segmentation::SegmentSelector; +use x86::shared::PrivilegeLevel; -pub struct Gdt(Vec); +pub struct Gdt { + table: [u64; 8], + current_offset: usize, +} impl Gdt { pub fn new() -> Gdt { - let zero_entry = 0; - Gdt(vec![zero_entry]) + Gdt { + table: [0; 8], + current_offset: 1, + } } - pub fn add_entry(&mut self, entry: Entry) -> Selector { - use core::mem::size_of; - let index = self.0.len() * size_of::(); - - match entry { - Entry::UserSegment(entry) => self.0.push(entry), - Entry::SystemSegment(entry_low, entry_high) => { - self.0.push(entry_low); - self.0.push(entry_high); - } + fn push(&mut self, value: u64) -> usize { + if self.current_offset < self.table.len() { + let offset = self.current_offset; + self.table[offset] = value; + self.current_offset += 1; + offset + } else { + panic!("GDT full"); } + } - Selector(index as u16) + pub fn add_entry(&mut self, entry: Descriptor) -> SegmentSelector { + let index = match entry { + Descriptor::UserSegment(value) => self.push(value), + Descriptor::SystemSegment(value_low, value_high) => { + let index = self.push(value_low); + self.push(value_high); + index + } + }; + SegmentSelector::new(index as u16, PrivilegeLevel::Ring0) } pub fn load(&'static self) { @@ -30,31 +44,26 @@ impl Gdt { use core::mem::size_of; let ptr = DescriptorTablePointer { - base: self.0.as_ptr() as *const ::x86::shared::segmentation::SegmentDescriptor, - limit: (self.0.len() * size_of::() - 1) as u16, + base: self.table.as_ptr() as *const ::x86::shared::segmentation::SegmentDescriptor, + limit: (self.table.len() * size_of::() - 1) as u16, }; unsafe { lgdt(&ptr) }; } } -pub enum Entry { +pub enum Descriptor { UserSegment(u64), SystemSegment(u64, u64), } -impl Entry { - pub fn code_segment() -> Entry { - let flags = DESCRIPTOR_TYPE | PRESENT | READ_WRITE | EXECUTABLE | LONG_MODE; - Entry::UserSegment(flags.bits()) +impl Descriptor { + pub fn kernel_code_segment() -> Descriptor { + let flags = USER_SEGMENT | PRESENT | EXECUTABLE | LONG_MODE; + Descriptor::UserSegment(flags.bits()) } - pub fn data_segment() -> Entry { - let flags = DESCRIPTOR_TYPE | PRESENT | READ_WRITE; - Entry::UserSegment(flags.bits()) - } - - pub fn tss_segment(tss: &'static TaskStateSegment) -> Entry { + pub fn tss_segment(tss: &'static TaskStateSegment) -> Descriptor { use core::mem::size_of; let ptr = tss as *const _ as u64; @@ -67,54 +76,16 @@ impl Entry { let mut high = 0; high.set_range(0..32, ptr.get_range(32..64)); - Entry::SystemSegment(low, high) + Descriptor::SystemSegment(low, high) } } bitflags! { - flags EntryFlags: u64 { - const READ_WRITE = 1 << 41, + flags DescriptorFlags: u64 { const CONFORMING = 1 << 42, const EXECUTABLE = 1 << 43, - const DESCRIPTOR_TYPE = 1 << 44, + const USER_SEGMENT = 1 << 44, const PRESENT = 1 << 47, const LONG_MODE = 1 << 53, } } - -#[derive(Debug, Clone, Copy)] -pub struct Selector(u16); - -impl Selector { - pub fn new() -> Selector { - Selector(0) - } -} - -pub fn reload_segment_registers(code_selector: Selector, data_selector: Selector) { - - let current_code_selector: u16; - let current_data_selector: u16; - - unsafe { - asm!("mov $0, cs" : "=r" (current_code_selector) ::: "intel"); - asm!("mov $0, ds" : "=r" (current_data_selector) ::: "intel"); - } - assert_eq!(code_selector.0, current_code_selector); - assert_eq!(data_selector.0, current_data_selector); - - // jmp ax:.new_code_segment // TODO - // .new_code_segment: - // unsafe { asm!(" - // mov ax, $1 - // mov ss, ax - // mov ds, ax - // mov es, ax - // ":: "r" (code_selector.0), "r" (data_selector.0) :: "intel")}; - // -} - -/// Load the task state register. -pub unsafe fn load_ltr(selector: Selector) { - asm!("ltr $0" :: "r" (selector)); -} From 0b03c18b0636d5678e218334de9840744ff28fda Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 28 Dec 2016 16:21:33 +0100 Subject: [PATCH 10/31] Rewrite interrupts::init using a lazy_static IDT again --- src/interrupts/mod.rs | 72 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 5719411d..15dde3ed 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -91,52 +91,54 @@ macro_rules! handler_with_error_code { }} } -static IDT: Once = Once::new(); -static TSS: Once = Once::new(); -static GDT: Once = Once::new(); - -pub fn init(memory_controller: &mut MemoryController) { - let double_fault_stack = memory_controller.alloc_stack(1) - .expect("could not allocate double fault stack"); - - const DOUBLE_FAULT_IST_INDEX: u8 = 0; - - let tss = TSS.call_once(|| { - let mut tss = TaskStateSegment::new(); - tss.ist[DOUBLE_FAULT_IST_INDEX as usize] = double_fault_stack.top() as u64; - tss - }); - - let mut code_selector = gdt::Selector::new(); - let mut data_selector = gdt::Selector::new(); - let mut tss_selector = gdt::Selector::new(); - let gdt = GDT.call_once(|| { - let mut gdt = gdt::Gdt::new(); - - code_selector = gdt.add_entry(gdt::Entry::code_segment()); - data_selector = gdt.add_entry(gdt::Entry::data_segment()); - tss_selector = gdt.add_entry(gdt::Entry::tss_segment(tss)); - - gdt - }); - gdt.load(); - gdt::reload_segment_registers(code_selector, data_selector); - unsafe { gdt::load_ltr(tss_selector) }; - - let idt = IDT.call_once(|| { +lazy_static! { + static ref IDT: idt::Idt = { let mut idt = idt::Idt::new(); idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); idt.set_handler(8, handler_with_error_code!(double_fault_handler)) - .set_stack_index(DOUBLE_FAULT_IST_INDEX); + .set_stack_index(DOUBLE_FAULT_IST_INDEX as u8); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt + }; +} + +static TSS: Once = Once::new(); +static GDT: Once = Once::new(); +const DOUBLE_FAULT_IST_INDEX: usize = 0; + +pub fn init(memory_controller: &mut MemoryController) { + use x86::shared::segmentation::{SegmentSelector, set_cs}; + use x86::shared::task::load_tr; + + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + + let tss = TSS.call_once(|| { + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + tss }); - idt.load(); + let mut code_selector = SegmentSelector::empty(); + let mut tss_selector = SegmentSelector::empty(); + let gdt = GDT.call_once(|| { + let mut gdt = gdt::Gdt::new(); + tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); + gdt + }); + gdt.load(); + + unsafe { + set_cs(code_selector); + load_tr(tss_selector); + } + + IDT.load(); } #[derive(Debug)] From 58e7cc77d08fd3fb73a8ffd16e397fb93e1b1d94 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 28 Dec 2016 16:22:02 +0100 Subject: [PATCH 11/31] Data segment descriptors are not needed in 64-bit mode --- src/arch/x86_64/boot.asm | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/arch/x86_64/boot.asm b/src/arch/x86_64/boot.asm index a9d7a6bb..b4ff26f1 100644 --- a/src/arch/x86_64/boot.asm +++ b/src/arch/x86_64/boot.asm @@ -29,12 +29,6 @@ start: ; load the 64-bit GDT lgdt [gdt64.pointer] - ; update selectors - mov ax, gdt64.data - mov ss, ax - mov ds, ax - mov es, ax - jmp gdt64.code:long_mode_start set_up_page_tables: @@ -202,9 +196,7 @@ section .rodata gdt64: dq 0 ; zero entry .code: equ $ - gdt64 ; new - dq (1<<44) | (1<<47) | (1<<41) | (1<<43) | (1<<53) ; code segment -.data: equ $ - gdt64 ; new - dq (1<<44) | (1<<47) | (1<<41) ; data segment + dq (1<<44) | (1<<47) | (1<<43) | (1<<53) ; code segment .pointer: dw $ - gdt64 - 1 dq gdt64 From c3bfcbcd8db3653140d89afee7f863fbfeedb1e4 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 28 Dec 2016 16:22:18 +0100 Subject: [PATCH 12/31] Silence dead code warning --- src/memory/stack_allocator.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs index b8abc9f8..47e8b5c3 100644 --- a/src/memory/stack_allocator.rs +++ b/src/memory/stack_allocator.rs @@ -68,6 +68,7 @@ impl Stack { self.top } + #[allow(dead_code)] pub fn bottom(&self) -> usize { self.bottom } From ec358321da347cc2135a1528c2ec0b296b6ad8ed Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Wed, 28 Dec 2016 16:58:36 +0100 Subject: [PATCH 13/31] Finish first draft of double faults post --- blog/post/double-faults.md | 393 ++++++++++++++++++++++++++++++++++++- 1 file changed, 386 insertions(+), 7 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 1e4d2b3c..384c6ded 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -3,10 +3,17 @@ title = "Double Faults" date = "2016-11-08" +++ -In this post we will make our kernel completely exception-proof by catching double faults on a separate kernel stack. +In this post we explore double faults in detail. We also set up an Interrupt Stack Table to catch double faults on a separate kernel stack. This way, we will be able to completely avoid triple faults in the future, even on kernel stack overflow. +As always, the complete source code is available on [Github]. Please file [issues] for any problems, questions, or improvement suggestions. There is also a [gitter chat] and a [comment section] at the end of this page. + +[Github]: https://github.com/phil-opp/blog_os/tree/double_faults +[issues]: https://github.com/phil-opp/blog_os/issues +[gitter chat]: https://gitter.im/phil-opp/blog_os +[comment section]: #disqus_thread + ## What is a Double Fault? In simplified terms, a double fault is a special exception that occurs when the CPU fails to invoke an exception handler. For example, it occurs when a page fault is triggered but there is no page fault handler registered in the [Interrupt Descriptor Table][IDT] (IDT). So it's kind of similar to catch-all blocks in programming languages with exceptions, e.g. `catch(...)` in C++ or `catch(Exception e)` in Java or C#. @@ -468,16 +475,388 @@ Let's create a new TSS that contains our double fault stack in its Interrupt Sta use x86::task::TaskStateSegment; ``` -The Global Descriptor Table (again) -Putting it together -What’s next? +Let's create a new TSS in our `interrupts::init` function: -In the previous post, we learned how to return from exceptions correctly. In this post, we will explore a special type of exception: the double fault. The double fault occurs whenever the invokation of an exception handler fails. For example, if we didn't declare any exception hanlder in the IDT. +{{< highlight rust "hl_lines=3 9 10" >}} +// in src/interrupts/mod.rs -Let's start by creating a handler function for double faults: +const DOUBLE_FAULT_IST_INDEX: usize = 0; + +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + + IDT.load(); +} +{{< / highlight >}} + +We define that the 0th IST entry is the double fault stack (any other IST index would work too). We create a new TSS through the `TaskStateSegment::new` function and load the top address (stacks grow downwards) of the double fault stack into the 0th entry. + +#### Loading the TSS +Now that we created a new TSS, we need a way to tell the CPU that it should use it. Unfortunately, this is a bit cumbersome, since the TSS is a Task State _Segment_ (for historical reasons). So instead of loading the table directly, we need to add a new segment descriptor to the [Global Descriptor Table] (GDT). Then we can load our TSS invoking the [`ltr` instruction] with the respective GDT index. + +[Global Descriptor Table]: http://www.flingos.co.uk/docs/reference/Global-Descriptor-Table/ +[`ltr` instruction]: http://x86.renejeschke.de/html/file_module_x86_id_163.html + +### The Global Descriptor Table (again) +The Global Descriptor Table (GDT) is a relict that was used for [memory segmentation] before paging became the de facto standard. It is still needed in 64-bit mode for various things such as kernel/user mode configuration or TSS loading. + +[memory segmentation]: https://en.wikipedia.org/wiki/X86_memory_segmentation + +We already created a GDT [when switching to long mode]. Back then, we used assembly to create valid code and data segment descriptors, which were required to enter 64-bit mode. We could just edit that assembly file and add an additional TSS descriptor. However, we now have the expressiveness of Rust, so let's do it in Rust instead. + +[when switching to long mode]: {{% relref "02-entering-longmode.md#the-global-descriptor-table" %}} + +We start by creating a new `interrupts::gdt` submodule: ```rust +// in src/interrupts/mod.rs +mod gdt; ``` -Next, we need to register the double fault handler in our IDT: +```rust +// src/interrupts/gdt.rs + +pub struct Gdt([u64; 8]); + +impl Gdt { + pub fn new() -> Gdt { + Gdt([0; 8]) + } +} +``` +We create a simple `Gdt` type as a newtype wrapper around `[u64; 8]`. Theoretically, a GDT can have up to 8192 entries, but this doesn't make much sense in long mode. Eight entries should be more than enough for our system. + +#### User and System Segments +There are two types of GDT entries in long mode: user and system segment descriptors. Descriptors for code and data segment segments are user segment descriptors. They contain no addresses since segments always span the complete address space on x86_64 (real segmentation is no longer supported). Thus, user segment descriptors only contain a few flags (e.g. present or user mode) and fit into a single `u64` entry. + +System descriptors such as TSS descriptors are different. They often contain a base address and a limit (e.g. TSS start and length) and thus need more than 64 bits. Therefore, system segments are 128 bits. They are stored as two consecutive entries in the GDT. + +Consequently, we model a `Descriptor` as an `enum`: + +```rust +pub enum Descriptor { + UserSegment(u64), + SystemSegment(u64, u64), +} +``` + +The flag bits are common between all descriptor types, so we create a general `DescriptorFlags` type: + +```rust +bitflags! { + flags DescriptorFlags: u64 { + const CONFORMING = 1 << 42, + const EXECUTABLE = 1 << 43, + const USER_SEGMENT = 1 << 44, + const PRESENT = 1 << 47, + const LONG_MODE = 1 << 53, + } +} +``` + +We only add flags that are relevant in 64-bit mode. For example, we omit the read/write bit, since it is completely ignored by the CPU. + +#### Code Segments +We add a function to create kernel mode code segments: + +```rust +impl Descriptor { + pub fn kernel_code_segment() -> Descriptor { + let flags = USER_SEGMENT | PRESENT | EXECUTABLE | LONG_MODE; + Descriptor::UserSegment(flags.bits()) + } +} +``` +We set the `USER_SEGMENT` bit to indicate a 64 bit descriptor (otherwise the CPU expects a 128 bit system descriptor). The `PRESENT`, `EXECUTABLE`, and `LONG_MODE` bits are also needed for a 64-bit mode code segment. + +The data segment registers `ds`, `ss`, and `es` are completely ignored in 64-bit mode, so we don't need any data segment descriptors in our GDT. + +#### TSS Segments +A TSS descriptor has the following format: + +Bit(s) | Name | Meaning +--------------------- | ------ | ---------------------------------- +0-15 | **limit 0-15** | the first 2 byte of the TSS's limit +16-39 | **base 0-23** | the first 3 byte of the TSS's base address +40-43 | **type** | must be `0b1001` for an available 64-bit TSS +44 | zero | must be 0 +45-46 | privilege | the [ring level]: 0 for kernel, 3 for user +47 | **present** | must be 1 for valid selectors +48-51 | limit 16-19 | bits 16 to 19 of the segment's limit +52 | available | freely available to the OS +53-54 | ignored | +55 | granularity | if it's set, the limit is the number of pages, else it's a byte number +56-63 | **base 24-31** | the fourth byte of the base address +64-95 | **base 32-63** | the last four bytes of the base address +96-127 | ignored/must be zero | bits 104-108 must be zero, the rest is ignored + +[ring level]: http://wiki.osdev.org/Security#Rings + +We only need the bold fields for our TSS descriptor. For example, we don't need the `limit 16-19` field since a TSS has a fixed size that is smaller than `2^16`. + +Let's add a function to our descriptor that creates a TSS descriptor for a given TSS: + +```rust +impl Descriptor { + pub fn tss_segment(tss: &'static TaskStateSegment) -> Descriptor { + use core::mem::size_of; + + let ptr = tss as *const _ as u64; + + let mut low = PRESENT.bits(); + // base + low.set_range(16..40, ptr.get_range(0..24)); + low.set_range(56..64, ptr.get_range(24..32)); + // limit (the `-1` in needed since the bound is inclusive) + low.set_range(0..16, (size_of::() - 1) as u64); + // type (0b1001 = available 64-bit tss) + low.set_range(40..44, 0b1001); + + let mut high = 0; + high.set_range(0..32, ptr.get_range(32..64)); + + Descriptor::SystemSegment(low, high) + } +} +``` +We convert the passed `TaskStateSegment` reference to an `u64` and use the methods of the [`BitField` trait] to set the needed fields. +We require the `'static` lifetime for the `TaskStateSegment` reference, since the hardware might access it on every interrupt as long as the OS runs. + +[`BitField` trait]: https://docs.rs/bit_field/0.6.0/bit_field/trait.BitField.html#method.get_bit + +#### Adding Descriptors to the GDT +In order to add descriptors to the GDT, we add a `add_entry` method: + +```rust +impl Gdt { + pub fn add_entry(&mut self, entry: Descriptor) -> SegmentSelector { + let index = match entry { + Descriptor::UserSegment(value) => self.push(value), + Descriptor::SystemSegment(value_low, value_high) => { + let index = self.push(value_low); + self.push(value_high); + index + } + }; + SegmentSelector::new(index as u16, PrivilegeLevel::Ring0) + } +} +``` +For an user segment we just push the `u64` and remember the index. For a system segment, we push the low and high `u64` and use the index of the low value. We then use this index to return a new [SegmentSelector]. + +[SegmentSelector]: https://docs.rs/x86/0.8.0/x86/shared/segmentation/struct.SegmentSelector.html#method.new + +The `push` method looks like this: + +```rust +impl Gdt { + fn push(&mut self, value: u64) -> usize { + for (i, entry) in self.0.iter_mut().enumerate().skip(1) { + if *entry == 0 { + *entry = value; + return i; + } + } + panic!("GDT full"); + } +} +``` +The method iterates over the `[u64; 8]` array and chooses the first free entry (entry is 0). The zero-th entry of valid GDTs needs to be always 0, so we `skip` it in our search. If there is no free entry left, we panic since this likely indicates a programming error (we should never need to create more than two or three GDT entries for our kernel). + +#### Loading the GDT +To load the GDT, we add a new `load` method: + +```rust +impl Gdt { + pub fn load(&'static self) { + use x86::shared::dtables::{DescriptorTablePointer, lgdt}; + use x86::shared::segmentation; + use core::mem::size_of; + + let ptr = DescriptorTablePointer { + base: self.0.as_ptr() as *const segmentation::SegmentDescriptor, + limit: (self.0.len() * size_of::() - 1) as u16, + }; + + unsafe { lgdt(&ptr) }; + } +} +``` +We use the [`DescriptorTablePointer` struct] and the [`lgdt` function] provided by the `x86` crate to load our GDT. Again, we require a `'static'` reference since the GDT possibly needs to live for the rest of the run time. + +[`DescriptorTablePointer` struct]: https://docs.rs/x86/0.8.0/x86/shared/dtables/struct.DescriptorTablePointer.html +[`lgdt` function]: https://docs.rs/x86/0.8.0/x86/shared/dtables/fn.lgdt.html + +### Putting it together +We now have a double fault stack and are able to create and load a TSS (with an IST). So let's put everything together to catch kernel stack overflows. + +We already created a new TSS in our `interrupts::init` function. Now we can load this TSS by creating a new GDT: + +{{< highlight rust "hl_lines=11 12 13" >}} +// in src/interrupts/mod.rs + +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + + let mut gdt = gdt::Gdt::new(); + let code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); + let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + gdt.load(); + + IDT.load(); +} +{{< / highlight >}} + +However, when we try to compile it, the following error occurs: + +``` +error: `tss` does not live long enough + --> src/interrupts/mod.rs:108:68 + | +108 | let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + | does not live long enough ^^^ +... +111 | } + | - borrowed value only lives until here + | + = note: borrowed value must be valid for the static lifetime... +``` +The problem is that we require that the TSS is valid for the rest of the run time (i.e. for the `'static` lifetime). But our created `tss` lives on the stack and is thus destroyed at the end of the `init` function. So how do we fix this problem? + +We could allocate our TSS on the heap using `Box` and use [into_raw] and a bit of `unsafe` to convert it to a `&'static` ([RFC 1233] was closed unfortunately). + +Alternatively, we could store it in a `static` somehow. The [`lazy_static` macro] doesn't work here, since we need access to the `MemoryController` for initialization. However, we can use its fundamental building block, the [`spin::Once` type]. + +[into_raw]: https://doc.rust-lang.org/std/boxed/struct.Box.html#method.into_raw +[RFC 1233]: https://github.com/rust-lang/rfcs/pull/1233 +[`lazy_static` macro]: https://docs.rs/lazy_static/0.2.2/lazy_static/ +[`spin::Once` type]: https://docs.rs/spin/0.4.5/spin/struct.Once.html + +#### spin::Once +Let's try to solve our problem using `spin::Once`: + +```rust +// in src/interrupts/mod.rs + +use spin::Once; + +static TSS: Once = Once::new(); +static GDT: Once = Once::new(); +``` +The `Once` type allows us to initialize a `static` at runtime. It is safe because the only way to access the static value is through the provided methods ([call_once][Once::call_once], [try][Once::try], and [wait][Once::wait]). Thus, no value can be read before initialization and the value can only be initialized once. + +[Once::call_once]: https://docs.rs/spin/0.4.5/spin/struct.Once.html#method.call_once +[Once::try]: https://docs.rs/spin/0.4.5/spin/struct.Once.html#method.try +[Once::wait]: https://docs.rs/spin/0.4.5/spin/struct.Once.html#method.wait + +So let's rewrite our `interrupts::init` function to use the static `TSS` and `GDT`: + +{{< highlight rust "hl_lines=6 9 10 12 17 18" >}} +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + + let tss = TSS.call_once(|| { + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + tss + }); + + let gdt = GDT.call_once(|| { + let mut gdt = gdt::Gdt::new(); + let code_selector = gdt.add_entry(gdt::Descriptor:: + kernel_code_segment()); + let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + gdt + }); + gdt.load(); + + IDT.load(); +} +{{< / highlight >}} + +Now it should compile again! + +#### The final Steps +We're almost done. We successfully loaded our new GDT, which contains a TSS descriptor. Now there are just a few steps left: + +1. We changed our GDT, so we should reload the `cs`, the code segment register. This required since the old segment selector could point a different GDT descriptor now (e.g. a TSS descriptor). +2. We loaded a GDT that contains a TSS selector, but we still need to tell the CPU that it should use that TSS. +3. As soon as our TSS is loaded, the CPU has access to a valid interrupt stack table (IST). Then we can tell the CPU that it should use our new double fault stack by modifying our double fault IDT entry. + +For the first two steps, we need access to the `code_selector` and `tss_selector` outside of the closure. We can achieve this by defining them outside of the closure: + +{{< highlight rust "hl_lines=3 4 7 8 11 12 19 21" >}} +// in src/interrupts/mod.rs +pub fn init(memory_controller: &mut MemoryController) { + use x86::shared::segmentation::{SegmentSelector, set_cs}; + use x86::shared::task::load_tr; + ... + + let mut code_selector = SegmentSelector::empty(); + let mut tss_selector = SegmentSelector::empty(); + let gdt = GDT.call_once(|| { + let mut gdt = gdt::Gdt::new(); + code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); + tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + gdt + }); + gdt.load(); + + unsafe { + // reload code segment register + set_cs(code_selector); + // load TSS + load_tr(tss_selector); + } + + IDT.load(); +} +{{< / highlight >}} + +We first set the descriptors to `empty` and then update them from inside the closure. Now we're able to reload the code segment register using [`set_cs`] and to load the TSS using [`load_tr`]. + +[`set_cs`]: https://docs.rs/x86/0.8.0/x86/shared/segmentation/fn.set_cs.html +[`load_tr`]: https://docs.rs/x86/0.8.0/x86/shared/task/fn.load_tr.html + +Now we that we loaded a valid TSS and interrupt stack table, we can set the stack index for our double fault handler in the IDT: + +{{< highlight rust "hl_lines=8" >}} +// in src/interrupt/mod.rs + +lazy_static! { + static ref IDT: idt::Idt = { + let mut idt = idt::Idt::new(); + ... + idt.set_handler(8, handler_with_error_code!(double_fault_handler)) + .set_stack_index(DOUBLE_FAULT_IST_INDEX as u8); + ... + }; +} + +{{< / highlight >}} + +TODO `set_stack_index` method? + +That's it! Now the CPU should switch to the double fault stack whenever a double fault occurs. Thus, we are able to catch _all_ double faults, including kernel stack overflows: + +![QEMU printing `EXCEPTION: DOUBLE FAULT` and a dump of the exception stack frame](images/qemu-double-fault-on-stack-overflow.png) + +From now on we should never see a triple fault again! + +## What's next? +Now that we mastered exceptions, it's time to explore another kind of interrupts: interrupts from external devices such as timers, keyboard, or network controllers. These hardware interrupts are very similar to exceptions, e.g. they are also dispatched through the IDT. + +However, they don't arise directly on the CPU like exceptions. Instead, an _interrupt controller_ aggregates these interrupts and forwards them to CPU depending on their priority. In the next posts we will explore the two interrupt controller variants on x86: the [Intel 8259] \(“PIC”) and the [APIC]. This will allow us to react to keyboard input. + +[Intel 8259]: https://en.wikipedia.org/wiki/Intel_8259 +[APIC]: https://en.wikipedia.org/wiki/Advanced_Programmable_Interrupt_Controller From 1e45ddd79c0468bc6e44a4e016a93be76a1e02e0 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Thu, 29 Dec 2016 15:31:34 +0100 Subject: [PATCH 14/31] Link double fault post and source in Readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 77a66dc8..142266cb 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ This repository contains the source code for the _Writing an OS in Rust_ series ([source code](https://github.com/phil-opp/blog_os/tree/better_exception_messages)) - [Returning from Exceptions](http://os.phil-opp.com/returning-from-exceptions.html) ([source code](https://github.com/phil-opp/blog_os/tree/returning_from_exceptions)) +- [Double Faults](http://os.phil-opp.com/double-faults.html) + ([source code](https://github.com/phil-opp/blog_os/tree/double_faults)) ## Additional Resources - [Cross Compile Binutils](http://os.phil-opp.com/cross-compile-binutils.html) From e121edced2a8812b4f0d4e82211d45be0cbf0aea Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Thu, 29 Dec 2016 17:46:23 +0100 Subject: [PATCH 15/31] Change the GDT push logic Instead of looking for a zero field, we now keep track of the next free index in a separate field. This avoids the bug that the high u64 of a pushed TSS descriptor is treated as empty. --- blog/post/double-faults.md | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 384c6ded..8a5de97e 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -521,15 +521,23 @@ mod gdt; ```rust // src/interrupts/gdt.rs -pub struct Gdt([u64; 8]); +pub struct Gdt { + table: [u64; 8], + next_free: usize, +} impl Gdt { pub fn new() -> Gdt { - Gdt([0; 8]) + Gdt { + table: [0; 8], + next_free: 1, + } } } ``` -We create a simple `Gdt` type as a newtype wrapper around `[u64; 8]`. Theoretically, a GDT can have up to 8192 entries, but this doesn't make much sense in long mode. Eight entries should be more than enough for our system. +We create a simple `Gdt` struct with two fields. The `table` field contains the actual GDT modeled as a `[u64; 8]`. Theoretically, a GDT can have up to 8192 entries, but this doesn't make much sense in 64-bit mode (since there is no real segmentation support). Eight entries should be more than enough for our system. + +The `next_free` field stores the index of the next free entry. We initialize it with `1` since the 0th entry needs always needs to be 0 in a valid GDT. #### User and System Segments There are two types of GDT entries in long mode: user and system segment descriptors. Descriptors for code and data segment segments are user segment descriptors. They contain no addresses since segments always span the complete address space on x86_64 (real segmentation is no longer supported). Thus, user segment descriptors only contain a few flags (e.g. present or user mode) and fit into a single `u64` entry. @@ -656,17 +664,18 @@ The `push` method looks like this: ```rust impl Gdt { fn push(&mut self, value: u64) -> usize { - for (i, entry) in self.0.iter_mut().enumerate().skip(1) { - if *entry == 0 { - *entry = value; - return i; - } + if self.next_free < self.table.len() { + let index = self.next_free; + self.table[index] = value; + self.next_free += 1; + index + } else { + panic!("GDT full"); } - panic!("GDT full"); } } ``` -The method iterates over the `[u64; 8]` array and chooses the first free entry (entry is 0). The zero-th entry of valid GDTs needs to be always 0, so we `skip` it in our search. If there is no free entry left, we panic since this likely indicates a programming error (we should never need to create more than two or three GDT entries for our kernel). +The method just writes to the `next_free` entry and returns the corresponding index. If there is no free entry left, we panic since this likely indicates a programming error (we should never need to create more than two or three GDT entries for our kernel). #### Loading the GDT To load the GDT, we add a new `load` method: From cce40a1d6729cccc14d64d85047440a0972ac5ec Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Thu, 29 Dec 2016 17:46:53 +0100 Subject: [PATCH 16/31] Many small improvements --- blog/post/double-faults.md | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 8a5de97e..410ec7c0 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -3,7 +3,7 @@ title = "Double Faults" date = "2016-11-08" +++ -In this post we explore double faults in detail. We also set up an Interrupt Stack Table to catch double faults on a separate kernel stack. This way, we will be able to completely avoid triple faults in the future, even on kernel stack overflow. +In this post we explore double faults in detail. We also set up an _Interrupt Stack Table_ to catch double faults on a separate kernel stack. This way, we can completely prevent triple faults, even on kernel stack overflow. @@ -65,7 +65,7 @@ The reason for the boot loop is the following: [int 1]: https://en.wikipedia.org/wiki/INT_(x86_instruction) -So in order to prevent this triple fault, we need to either provide a handler function for `Debug` exceptions or a double fault handler. We will do the latter, since we want to avoid triple faults completely. +So in order to prevent this triple fault, we need to either provide a handler function for debug exceptions or a double fault handler. We will do the latter, since we want to avoid triple faults in all cases. ### A Double Fault Handler A double fault is a normal exception with an error code, so we can use our `handler_with_error_code` macro to create a wrapper function: @@ -96,7 +96,7 @@ extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, } {{< / highlight >}} -Our handler prints a short error message and dumps the exception stack frame. The error code of the double fault handler is _always zero_, so there's no reason to print it. +Our handler prints a short error message and dumps the exception stack frame. The error code of the double fault handler is always zero, so there's no reason to print it. When we start our kernel now, we should see that the double fault handler is invoked: @@ -220,7 +220,6 @@ mod stack_allocator; ``` -#### The `stack_allocator` Module First, we create a new `StackAllocator` struct and a constructor function: ```rust @@ -242,7 +241,7 @@ We create a simple `StackAllocator` that allocates stacks from a given range of [in the kernel heap post]: {{% relref "08-kernel-heap.md#mapping-the-heap" %}} -In order to allocate new stacks, we add a `alloc_stack` method: +We add a `alloc_stack` method that allocates a new stack: ```rust // in src/memory/stack_allocator.rs @@ -293,7 +292,7 @@ impl StackAllocator { } } ``` -The method takes mutable references to the [ActivePageTable] and a [FrameAllocator], since it needs to map the new virtual stack pages to physical frames. The stack size is a multiple of the page size. +The method takes mutable references to the [ActivePageTable] and a [FrameAllocator], since it needs to map the new virtual stack pages to physical frames. We define that the stack size is a multiple of the page size. [ActivePageTable]: {{% relref "06-page-tables.md#page-table-ownership" %}} [FrameAllocator]: {{% relref "05-allocating-frames.md#a-frame-allocator" %}} @@ -362,7 +361,7 @@ impl MemoryController { } } ``` -The `MemoryController` struct holds the three types that are required for `alloc_stack` and provides a simpler interface (only one argument). The `alloc_stack` wrapper just takes the tree types as `&mut` through [destructuring] and forwards them to the `stack_allocator`. The [ref mut]-s are needed to take the inner fields by mutable reference. Note that we're re-exporting the `Stack` and `StackPointer` types since they are returned by `alloc_stack`. +The `MemoryController` struct holds the three types that are required for `alloc_stack` and provides a simpler interface (only one argument). The `alloc_stack` wrapper just takes the tree types as `&mut` through [destructuring] and forwards them to the `stack_allocator`. The [ref mut]-s are needed to take the inner fields by mutable reference. Note that we're re-exporting the `Stack` type since it is returned by `alloc_stack`. [destructuring]: http://rust-lang.github.io/book/chXX-patterns.html#Destructuring [ref mut]: http://rust-lang.github.io/book/chXX-patterns.html#ref-and-ref-mut @@ -441,7 +440,7 @@ pub fn init(memory_controller: &mut MemoryController) { We allocate a 4096 bytes stack (one page) for our double fault handler. Now we just need some way to tell the CPU that it should use this stack for handling double faults. ### The IST and TSS -The Interrupt Stack Table (IST) is part of an old legacy structure called [Task State Segment] \(TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit x86 and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. +The Interrupt Stack Table (IST) is part of an old legacy structure called _[Task State Segment]_ \(TSS). The TSS used to hold various information (e.g. processor register state) about a task in 32-bit mode and was for example used for [hardware context switching]. However, hardware context switching is no longer supported in 64-bit mode and the format of the TSS changed completely. [Task State Segment]: https://en.wikipedia.org/wiki/Task_state_segment [hardware context switching]: http://wiki.osdev.org/Context_Switching#Hardware_Context_Switching @@ -465,7 +464,7 @@ I/O Map Base Address | `u16` The _Privilege Stack Table_ is used by the CPU when the privilege level changes. For example, if an exception occurs while the CPU is in user mode (privilege level 3), the CPU normally switches to kernel mode (privilege level 0) before invoking the exception handler. In that case, the CPU would switch to the 0th stack in the Privilege Stack Table (since 0 is the target privilege level). We don't have any user mode programs yet, so we ignore this table for now. #### Creating a TSS -Let's create a new TSS that contains our double fault stack in its Interrupt Stack Table. For that we need a TSS struct. Fortunately, the `x86` crate already contains a [`TaskStateSegment` struct] that we can use: +Let's create a new TSS that contains our double fault stack in its interrupt stack table. For that we need a TSS struct. Fortunately, the `x86` crate already contains a [`TaskStateSegment` struct] that we can use: [`TaskStateSegment` struct]: https://docs.rs/x86/0.7.1/x86/task/struct.TaskStateSegment.html @@ -567,7 +566,7 @@ bitflags! { } ``` -We only add flags that are relevant in 64-bit mode. For example, we omit the read/write bit, since it is completely ignored by the CPU. +We only add flags that are relevant in 64-bit mode. For example, we omit the read/write bit, since it is completely ignored by the CPU in 64-bit mode. #### Code Segments We add a function to create kernel mode code segments: @@ -580,12 +579,12 @@ impl Descriptor { } } ``` -We set the `USER_SEGMENT` bit to indicate a 64 bit descriptor (otherwise the CPU expects a 128 bit system descriptor). The `PRESENT`, `EXECUTABLE`, and `LONG_MODE` bits are also needed for a 64-bit mode code segment. +We set the `USER_SEGMENT` bit to indicate a 64 bit user segment descriptor (otherwise the CPU expects a 128 bit system segment descriptor). The `PRESENT`, `EXECUTABLE`, and `LONG_MODE` bits are also needed for a 64-bit mode code segment. The data segment registers `ds`, `ss`, and `es` are completely ignored in 64-bit mode, so we don't need any data segment descriptors in our GDT. #### TSS Segments -A TSS descriptor has the following format: +A TSS descriptor is a system segment descriptor with the following format: Bit(s) | Name | Meaning --------------------- | ------ | ---------------------------------- @@ -688,25 +687,26 @@ impl Gdt { use core::mem::size_of; let ptr = DescriptorTablePointer { - base: self.0.as_ptr() as *const segmentation::SegmentDescriptor, - limit: (self.0.len() * size_of::() - 1) as u16, + base: self.table.as_ptr() as + *const segmentation::SegmentDescriptor, + limit: (self.table.len() * size_of::() - 1) as u16, }; unsafe { lgdt(&ptr) }; } } ``` -We use the [`DescriptorTablePointer` struct] and the [`lgdt` function] provided by the `x86` crate to load our GDT. Again, we require a `'static'` reference since the GDT possibly needs to live for the rest of the run time. +We use the [`DescriptorTablePointer` struct] and the [`lgdt` function] provided by the `x86` crate to load our GDT. Again, we require a `'static` reference since the GDT possibly needs to live for the rest of the run time. [`DescriptorTablePointer` struct]: https://docs.rs/x86/0.8.0/x86/shared/dtables/struct.DescriptorTablePointer.html [`lgdt` function]: https://docs.rs/x86/0.8.0/x86/shared/dtables/fn.lgdt.html ### Putting it together -We now have a double fault stack and are able to create and load a TSS (with an IST). So let's put everything together to catch kernel stack overflows. +We now have a double fault stack and are able to create and load a TSS (which contains an IST). So let's put everything together to catch kernel stack overflows. We already created a new TSS in our `interrupts::init` function. Now we can load this TSS by creating a new GDT: -{{< highlight rust "hl_lines=11 12 13" >}} +{{< highlight rust "hl_lines=10 11 12 13" >}} // in src/interrupts/mod.rs pub fn init(memory_controller: &mut MemoryController) { @@ -751,7 +751,7 @@ Alternatively, we could store it in a `static` somehow. The [`lazy_static` macro [`spin::Once` type]: https://docs.rs/spin/0.4.5/spin/struct.Once.html #### spin::Once -Let's try to solve our problem using `spin::Once`: +Let's try to solve our problem using [`spin::Once`][`spin::Once` type]: ```rust // in src/interrupts/mod.rs @@ -769,7 +769,7 @@ The `Once` type allows us to initialize a `static` at runtime. It is safe becaus So let's rewrite our `interrupts::init` function to use the static `TSS` and `GDT`: -{{< highlight rust "hl_lines=6 9 10 12 17 18" >}} +{{< highlight rust "hl_lines=5 8 9 11 16 17" >}} pub fn init(memory_controller: &mut MemoryController) { let double_fault_stack = memory_controller.alloc_stack(1) .expect("could not allocate double fault stack"); @@ -802,7 +802,7 @@ We're almost done. We successfully loaded our new GDT, which contains a TSS desc 2. We loaded a GDT that contains a TSS selector, but we still need to tell the CPU that it should use that TSS. 3. As soon as our TSS is loaded, the CPU has access to a valid interrupt stack table (IST). Then we can tell the CPU that it should use our new double fault stack by modifying our double fault IDT entry. -For the first two steps, we need access to the `code_selector` and `tss_selector` outside of the closure. We can achieve this by defining them outside of the closure: +For the first two steps, we need access to the `code_selector` and `tss_selector` variables outside of the closure. We can achieve this by moving the `let` declarations out of the closure: {{< highlight rust "hl_lines=3 4 7 8 11 12 19 21" >}} // in src/interrupts/mod.rs @@ -832,12 +832,12 @@ pub fn init(memory_controller: &mut MemoryController) { } {{< / highlight >}} -We first set the descriptors to `empty` and then update them from inside the closure. Now we're able to reload the code segment register using [`set_cs`] and to load the TSS using [`load_tr`]. +We first set the descriptors to `empty` and then update them from inside the closure (which implicitly borrows them as `&mut`). Now we're able to reload the code segment register using [`set_cs`] and to load the TSS using [`load_tr`]. [`set_cs`]: https://docs.rs/x86/0.8.0/x86/shared/segmentation/fn.set_cs.html [`load_tr`]: https://docs.rs/x86/0.8.0/x86/shared/task/fn.load_tr.html -Now we that we loaded a valid TSS and interrupt stack table, we can set the stack index for our double fault handler in the IDT: +Now that we loaded a valid TSS and interrupt stack table, we can set the stack index for our double fault handler in the IDT: {{< highlight rust "hl_lines=8" >}} // in src/interrupt/mod.rs @@ -863,9 +863,9 @@ That's it! Now the CPU should switch to the double fault stack whenever a double From now on we should never see a triple fault again! ## What's next? -Now that we mastered exceptions, it's time to explore another kind of interrupts: interrupts from external devices such as timers, keyboard, or network controllers. These hardware interrupts are very similar to exceptions, e.g. they are also dispatched through the IDT. +Now that we mastered exceptions, it's time to explore another kind of interrupts: interrupts from external devices such as timers, keyboards, or network controllers. These hardware interrupts are very similar to exceptions, e.g. they are also dispatched through the IDT. -However, they don't arise directly on the CPU like exceptions. Instead, an _interrupt controller_ aggregates these interrupts and forwards them to CPU depending on their priority. In the next posts we will explore the two interrupt controller variants on x86: the [Intel 8259] \(“PIC”) and the [APIC]. This will allow us to react to keyboard input. +However, unlike exceptions, they don't arise directly on the CPU. Instead, an _interrupt controller_ aggregates these interrupts and forwards them to CPU depending on their priority. In the next posts we will explore the two interrupt controller variants on x86: the [Intel 8259] \(“PIC”) and the [APIC]. This will allow us to react to keyboard and mouse input. [Intel 8259]: https://en.wikipedia.org/wiki/Intel_8259 [APIC]: https://en.wikipedia.org/wiki/Advanced_Programmable_Interrupt_Controller From 80354c8a72da1d1caee1c79923d357972942e486 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 16:52:25 +0100 Subject: [PATCH 17/31] Multiple code corrections in post --- blog/post/double-faults.md | 84 ++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 17 deletions(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 410ec7c0..2648bfcc 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -70,7 +70,7 @@ So in order to prevent this triple fault, we need to either provide a handler fu ### A Double Fault Handler A double fault is a normal exception with an error code, so we can use our `handler_with_error_code` macro to create a wrapper function: -{{< highlight rust "hl_lines=10 17" >}} +{{< highlight rust "hl_lines=11 18" >}} // in src/interrupts/mod.rs lazy_static! { @@ -80,6 +80,7 @@ lazy_static! { idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); + // new double fault handler idt.set_handler(8, handler_with_error_code!(double_fault_handler)); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); @@ -297,7 +298,9 @@ The method takes mutable references to the [ActivePageTable] and a [FrameAllocat [ActivePageTable]: {{% relref "06-page-tables.md#page-table-ownership" %}} [FrameAllocator]: {{% relref "05-allocating-frames.md#a-frame-allocator" %}} -Instead of operating directly on `self.range`, we [clone] it and only write it back on success. This way, subsequent stack allocations can still succeed if there are pages left (e.g., a call with `size_in_pages = 3` can still succeed after a failed call with `size_in_pages = 100`). In order to be able to clone `PageIter`, we add a `#[derive(Clone)]` to its definition in `src/memory/paging/mod.rs`. +Instead of operating directly on `self.range`, we [clone] it and only write it back on success. This way, subsequent stack allocations can still succeed if there are pages left (e.g., a call with `size_in_pages = 3` can still succeed after a failed call with `size_in_pages = 100`). + +In order to be able to clone `PageIter`, we add a `#[derive(Clone)]` to its definition in `src/memory/paging/mod.rs`. We also need to make the `start_address` method of the `Page` type public (in the same file). [clone]: https://doc.rust-lang.org/nightly/core/clone/trait.Clone.html#tymethod.clone @@ -326,11 +329,11 @@ impl Stack { } } - pub fn top(&self) -> StackPointer { + pub fn top(&self) -> usize { self.top } - pub fn bottom(&self) -> StackPointer { + pub fn bottom(&self) -> usize { self.bottom } } @@ -379,7 +382,7 @@ pub fn init(boot_info: &BootInformation) -> MemoryController { let stack_alloc_end = stack_alloc_start + 100; let stack_alloc_range = Page::range_inclusive(stack_alloc_start, stack_alloc_end); - stack_allocator::new_stack_allocator(stack_alloc_range) + stack_allocator::StackAllocator::new(stack_alloc_range) }; MemoryController { @@ -396,6 +399,8 @@ In order to do arithmetic on pages (e.g. calculate the hundredth page after `sta ```rust // in src/memory/paging/mod.rs +use core::ops::Add; + impl Add for Page { type Output = Page; @@ -466,12 +471,12 @@ The _Privilege Stack Table_ is used by the CPU when the privilege level changes. #### Creating a TSS Let's create a new TSS that contains our double fault stack in its interrupt stack table. For that we need a TSS struct. Fortunately, the `x86` crate already contains a [`TaskStateSegment` struct] that we can use: -[`TaskStateSegment` struct]: https://docs.rs/x86/0.7.1/x86/task/struct.TaskStateSegment.html +[`TaskStateSegment` struct]: https://docs.rs/x86/0.8.0/x86/bits64/task/struct.TaskStateSegment.html ```rust // in src/interrupts/mod.rs -use x86::task::TaskStateSegment; +use x86::bits64::task::TaskStateSegment; ``` Let's create a new TSS in our `interrupts::init` function: @@ -546,15 +551,21 @@ System descriptors such as TSS descriptors are different. They often contain a b Consequently, we model a `Descriptor` as an `enum`: ```rust +// in src/interrupts/gdt.rs + pub enum Descriptor { UserSegment(u64), SystemSegment(u64, u64), } ``` -The flag bits are common between all descriptor types, so we create a general `DescriptorFlags` type: +The flag bits are common between all descriptor types, so we create a general `DescriptorFlags` type (using the [bitflags] macro): + +[bitflags]: https://doc.rust-lang.org/bitflags/bitflags/macro.bitflags.html ```rust +// in src/interrupts/gdt.rs + bitflags! { flags DescriptorFlags: u64 { const CONFORMING = 1 << 42, @@ -572,6 +583,8 @@ We only add flags that are relevant in 64-bit mode. For example, we omit the rea We add a function to create kernel mode code segments: ```rust +// in src/interrupts/gdt.rs + impl Descriptor { pub fn kernel_code_segment() -> Descriptor { let flags = USER_SEGMENT | PRESENT | EXECUTABLE | LONG_MODE; @@ -609,9 +622,14 @@ We only need the bold fields for our TSS descriptor. For example, we don't need Let's add a function to our descriptor that creates a TSS descriptor for a given TSS: ```rust +// in src/interrupts/gdt.rs + +use x86::bits64::task::TaskStateSegment; + impl Descriptor { pub fn tss_segment(tss: &'static TaskStateSegment) -> Descriptor { use core::mem::size_of; + use bit_field::BitField; let ptr = tss as *const _ as u64; @@ -640,6 +658,11 @@ We require the `'static` lifetime for the `TaskStateSegment` reference, since th In order to add descriptors to the GDT, we add a `add_entry` method: ```rust +// in src/interrupts/gdt.rs + +use x86::shared::segmentation::SegmentSelector; +use x86::shared::PrivilegeLevel; + impl Gdt { pub fn add_entry(&mut self, entry: Descriptor) -> SegmentSelector { let index = match entry { @@ -661,6 +684,8 @@ For an user segment we just push the `u64` and remember the index. For a system The `push` method looks like this: ```rust +// in src/interrupts/gdt.rs + impl Gdt { fn push(&mut self, value: u64) -> usize { if self.next_free < self.table.len() { @@ -680,6 +705,8 @@ The method just writes to the `next_free` entry and returns the corresponding in To load the GDT, we add a new `load` method: ```rust +// in src/interrupts/gdt.rs + impl Gdt { pub fn load(&'static self) { use x86::shared::dtables::{DescriptorTablePointer, lgdt}; @@ -725,25 +752,36 @@ pub fn init(memory_controller: &mut MemoryController) { } {{< / highlight >}} -However, when we try to compile it, the following error occurs: +However, when we try to compile it, the following errors occur: ``` error: `tss` does not live long enough - --> src/interrupts/mod.rs:108:68 + --> src/interrupts/mod.rs:118:68 | -108 | let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); +118 | let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); | does not live long enough ^^^ ... -111 | } +122 | } + | - borrowed value only lives until here + | + = note: borrowed value must be valid for the static lifetime... + +error: `gdt` does not live long enough + --> src/interrupts/mod.rs:119:5 + | +119 | gdt.load(); + | ^^^ does not live long enough +... +122 | } | - borrowed value only lives until here | = note: borrowed value must be valid for the static lifetime... ``` -The problem is that we require that the TSS is valid for the rest of the run time (i.e. for the `'static` lifetime). But our created `tss` lives on the stack and is thus destroyed at the end of the `init` function. So how do we fix this problem? +The problem is that we require that the TSS and GDT are valid for the rest of the run time (i.e. for the `'static` lifetime). But our created `tss` and `gdt` live on the stack and are thus destroyed at the end of the `init` function. So how do we fix this problem? -We could allocate our TSS on the heap using `Box` and use [into_raw] and a bit of `unsafe` to convert it to a `&'static` ([RFC 1233] was closed unfortunately). +We could allocate our TSS and GDT on the heap using `Box` and use [into_raw] and a bit of `unsafe` to convert it to `&'static` references ([RFC 1233] was closed unfortunately). -Alternatively, we could store it in a `static` somehow. The [`lazy_static` macro] doesn't work here, since we need access to the `MemoryController` for initialization. However, we can use its fundamental building block, the [`spin::Once` type]. +Alternatively, we could store them in a `static` somehow. The [`lazy_static` macro] doesn't work here, since we need access to the `MemoryController` for initialization. However, we can use its fundamental building block, the [`spin::Once` type]. [into_raw]: https://doc.rust-lang.org/std/boxed/struct.Box.html#method.into_raw [RFC 1233]: https://github.com/rust-lang/rfcs/pull/1233 @@ -767,6 +805,8 @@ The `Once` type allows us to initialize a `static` at runtime. It is safe becaus [Once::try]: https://docs.rs/spin/0.4.5/spin/struct.Once.html#method.try [Once::wait]: https://docs.rs/spin/0.4.5/spin/struct.Once.html#method.wait +(The `Once` was added in spin 0.4, so you're probably need to update your spin dependency.) + So let's rewrite our `interrupts::init` function to use the static `TSS` and `GDT`: {{< highlight rust "hl_lines=5 8 9 11 16 17" >}} @@ -847,14 +887,24 @@ lazy_static! { let mut idt = idt::Idt::new(); ... idt.set_handler(8, handler_with_error_code!(double_fault_handler)) - .set_stack_index(DOUBLE_FAULT_IST_INDEX as u8); + .set_stack_index(DOUBLE_FAULT_IST_INDEX as u16); ... }; } {{< / highlight >}} -TODO `set_stack_index` method? +We also rewrite the `set_stack_index` method in `src/interrupts/idt.rs`: + +```rust +pub fn set_stack_index(&mut self, index: u16) -> &mut Self { + // The hardware IST index starts at 1, but our software IST index + // starts at 0. Therefore we need to add 1 here. + self.0.set_range(0..3, index + 1); + self +} +``` +The only change is that we're now adding `1` to the passed `index`, because the hardware expects an index between _one_ and seven. A zero means “no stack switch”. That's it! Now the CPU should switch to the double fault stack whenever a double fault occurs. Thus, we are able to catch _all_ double faults, including kernel stack overflows: From 055c534b4e84125f330dbd92746e500fadfd44fa Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 19:09:49 +0100 Subject: [PATCH 18/31] Add a section about possible safety problems --- blog/post/double-faults.md | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 2648bfcc..469d49e8 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -912,6 +912,61 @@ That's it! Now the CPU should switch to the double fault stack whenever a double From now on we should never see a triple fault again! +## Safety Problems +In this post, we needed a few `unsafe` blocks to load the GDT and TSS structures. We always used `'static` references, so the passed addresses should be always valid. + +However, the IST entries (stored in the TSS) are used as stack pointers by the CPU. This can lead to various memory safety violations: + +- The CPU writes to any address that we store in the IST. This way, we can easily circumvent Rust's safety guarantees and e.g. overwrite a `&mut` reference on some random stack space. +- If we use the same stack index for multiple exceptions, memory safety might be violated too. For example, imagine that the double fault hander and the breakpoint handler use the same IST index. If the double fault handler causes a breakpoint exception, the breakpoint overwrites the stack frame of the double fault handler. When the breakpoint returns, the CPU jumps back to the double fault handler and undefined behavior occurs. +- If we accidentally use an empty IST entry, the CPU uses the stack pointer `0`. This is really bad, since we overwrite our [recursively mapped] page tables this way. + +[recursively mapped]: {{% relref "06-page-tables.md#recursive-mapping" %}} + +Let's try the last case (empty IST entry) as an example: + +```rust +// in src/interrups/mod.rs + +lazy_static! { + static ref IDT: idt::Idt = { + ... + idt.set_handler(8, handler_with_error_code!(double_fault_handler)) + .set_stack_index(5); + ... + } +} +``` +Instead of using the `DOUBLE_FAULT_IST_INDEX`, we use the IST index 5. However the entry at index 5 is still empty. + +In oder to see the effect, we print the exception stack frame pointer in our `double_fault_handler`: + +```rust +// in src/interrupts/mod.rs + +extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, + _error_code: u64) +{ + println!("\nEXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); + println!("exception stack frame at {:#p}", stack_frame); // new + loop {} +} +``` +When we start our kernel now, we see that the exception stack frame was written to `0xffffffffffffffd8`: + +![QEMU printing `exception stack frame at 0xffffffffffffffd8](images/qemu-empty-IST-entry.png) + +This address is part of the [recursive mapping][recursively mapped], so the CPU just overwrote some random page table entries. + +### Possible Solutions? +Normally, the type system allows us to make things safer. Unfortunately, there are many difficulties in this case. For example, we need to be able to create multiple task state segments since we have multiple CPUs (each CPU has its own TSS). Each IST index is only valid if the corresponding TSS is loaded in the CPU. This makes compile-time abstractions very difficult. + +So this might be a case where we have to tolerate the safety dangers [^fn-ideas]. At least, they are limited to the `interrupts` module, where we already have lots of dangerous inline assembly. From outside, only the safe `interrupts::init` function is visible, so it is similar to an [unsafe abstraction]. We only need to be aware of the safety dangers when we edit the `interrupts` module in the future. + +[unsafe abstraction]: http://smallcultfollowing.com/babysteps/blog/2016/05/23/unsafe-abstractions/ + +[^fn-ideas]: If somebody has a good solution for this problem, please tell me :). + ## What's next? Now that we mastered exceptions, it's time to explore another kind of interrupts: interrupts from external devices such as timers, keyboards, or network controllers. These hardware interrupts are very similar to exceptions, e.g. they are also dispatched through the IDT. From 7db15cf892f63b20dcf3dba37c856b1fa0d5d588 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Fri, 30 Dec 2016 14:37:48 +0100 Subject: [PATCH 19/31] Reset source code to master again (in order to follow the post and test the exact code from it) --- src/arch/x86_64/boot.asm | 10 +++- src/interrupts/gdt.rs | 91 ----------------------------------- src/interrupts/idt.rs | 8 +-- src/interrupts/mod.rs | 45 +---------------- src/lib.rs | 24 ++------- src/memory/mod.rs | 32 +----------- src/memory/paging/mod.rs | 13 +---- src/memory/stack_allocator.rs | 75 ----------------------------- 8 files changed, 20 insertions(+), 278 deletions(-) delete mode 100644 src/interrupts/gdt.rs delete mode 100644 src/memory/stack_allocator.rs diff --git a/src/arch/x86_64/boot.asm b/src/arch/x86_64/boot.asm index b4ff26f1..a9d7a6bb 100644 --- a/src/arch/x86_64/boot.asm +++ b/src/arch/x86_64/boot.asm @@ -29,6 +29,12 @@ start: ; load the 64-bit GDT lgdt [gdt64.pointer] + ; update selectors + mov ax, gdt64.data + mov ss, ax + mov ds, ax + mov es, ax + jmp gdt64.code:long_mode_start set_up_page_tables: @@ -196,7 +202,9 @@ section .rodata gdt64: dq 0 ; zero entry .code: equ $ - gdt64 ; new - dq (1<<44) | (1<<47) | (1<<43) | (1<<53) ; code segment + dq (1<<44) | (1<<47) | (1<<41) | (1<<43) | (1<<53) ; code segment +.data: equ $ - gdt64 ; new + dq (1<<44) | (1<<47) | (1<<41) ; data segment .pointer: dw $ - gdt64 - 1 dq gdt64 diff --git a/src/interrupts/gdt.rs b/src/interrupts/gdt.rs deleted file mode 100644 index e67956f3..00000000 --- a/src/interrupts/gdt.rs +++ /dev/null @@ -1,91 +0,0 @@ -use bit_field::BitField; -use x86::bits64::task::TaskStateSegment; -use x86::shared::segmentation::SegmentSelector; -use x86::shared::PrivilegeLevel; - -pub struct Gdt { - table: [u64; 8], - current_offset: usize, -} - -impl Gdt { - pub fn new() -> Gdt { - Gdt { - table: [0; 8], - current_offset: 1, - } - } - - fn push(&mut self, value: u64) -> usize { - if self.current_offset < self.table.len() { - let offset = self.current_offset; - self.table[offset] = value; - self.current_offset += 1; - offset - } else { - panic!("GDT full"); - } - } - - pub fn add_entry(&mut self, entry: Descriptor) -> SegmentSelector { - let index = match entry { - Descriptor::UserSegment(value) => self.push(value), - Descriptor::SystemSegment(value_low, value_high) => { - let index = self.push(value_low); - self.push(value_high); - index - } - }; - SegmentSelector::new(index as u16, PrivilegeLevel::Ring0) - } - - pub fn load(&'static self) { - use x86::shared::dtables::{DescriptorTablePointer, lgdt}; - use core::mem::size_of; - - let ptr = DescriptorTablePointer { - base: self.table.as_ptr() as *const ::x86::shared::segmentation::SegmentDescriptor, - limit: (self.table.len() * size_of::() - 1) as u16, - }; - - unsafe { lgdt(&ptr) }; - } -} - -pub enum Descriptor { - UserSegment(u64), - SystemSegment(u64, u64), -} - -impl Descriptor { - pub fn kernel_code_segment() -> Descriptor { - let flags = USER_SEGMENT | PRESENT | EXECUTABLE | LONG_MODE; - Descriptor::UserSegment(flags.bits()) - } - - pub fn tss_segment(tss: &'static TaskStateSegment) -> Descriptor { - use core::mem::size_of; - - let ptr = tss as *const _ as u64; - - let mut low = PRESENT.bits(); - low.set_range(0..16, (size_of::() - 1) as u64); - low.set_range(16..40, ptr.get_range(0..24)); - low.set_range(40..44, 0b1001); // type: available 64-bit tss - - let mut high = 0; - high.set_range(0..32, ptr.get_range(32..64)); - - Descriptor::SystemSegment(low, high) - } -} - -bitflags! { - flags DescriptorFlags: u64 { - const CONFORMING = 1 << 42, - const EXECUTABLE = 1 << 43, - const USER_SEGMENT = 1 << 44, - const PRESENT = 1 << 47, - const LONG_MODE = 1 << 53, - } -} diff --git a/src/interrupts/idt.rs b/src/interrupts/idt.rs index 63f81410..9c191cbb 100644 --- a/src/interrupts/idt.rs +++ b/src/interrupts/idt.rs @@ -102,14 +102,14 @@ impl EntryOptions { } #[allow(dead_code)] - pub fn set_privilege_level(&mut self, dpl: u8) -> &mut Self { - self.0.set_range(13..15, dpl.into()); + pub fn set_privilege_level(&mut self, dpl: u16) -> &mut Self { + self.0.set_range(13..15, dpl); self } #[allow(dead_code)] - pub fn set_stack_index(&mut self, index: u8) -> &mut Self { - self.0.set_range(0..3, (index + 1).into()); + pub fn set_stack_index(&mut self, index: u16) -> &mut Self { + self.0.set_range(0..3, index); self } } diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 15dde3ed..6556236e 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -7,12 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use spin::Once; -use memory::MemoryController; -use x86::bits64::task::TaskStateSegment; - mod idt; -mod gdt; macro_rules! save_scratch_registers { () => { @@ -98,46 +93,13 @@ lazy_static! { idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); - idt.set_handler(8, handler_with_error_code!(double_fault_handler)) - .set_stack_index(DOUBLE_FAULT_IST_INDEX as u8); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt }; } -static TSS: Once = Once::new(); -static GDT: Once = Once::new(); -const DOUBLE_FAULT_IST_INDEX: usize = 0; - -pub fn init(memory_controller: &mut MemoryController) { - use x86::shared::segmentation::{SegmentSelector, set_cs}; - use x86::shared::task::load_tr; - - let double_fault_stack = memory_controller.alloc_stack(1) - .expect("could not allocate double fault stack"); - - let tss = TSS.call_once(|| { - let mut tss = TaskStateSegment::new(); - tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; - tss - }); - - let mut code_selector = SegmentSelector::empty(); - let mut tss_selector = SegmentSelector::empty(); - let gdt = GDT.call_once(|| { - let mut gdt = gdt::Gdt::new(); - tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); - code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); - gdt - }); - gdt.load(); - - unsafe { - set_cs(code_selector); - load_tr(tss_selector); - } - +pub fn init() { IDT.load(); } @@ -188,8 +150,3 @@ extern "C" fn page_fault_handler(stack_frame: &ExceptionStackFrame, error_code: stack_frame); loop {} } - -extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, _error_code: u64) { - println!("\nEXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); - loop {} -} diff --git a/src/lib.rs b/src/lib.rs index c2149d2b..441b2e5f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,6 @@ #![feature(asm)] #![feature(naked_functions)] #![feature(core_intrinsics)] -#![feature(drop_types_in_const)] #![no_std] extern crate rlibc; @@ -52,31 +51,14 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { enable_write_protect_bit(); // set up guard page and map the heap pages - let mut memory_controller = memory::init(boot_info); + memory::init(boot_info); // initialize our IDT - interrupts::init(&mut memory_controller); + interrupts::init(); + // trigger a breakpoint exception unsafe { int!(3) }; - stack_overflow(); - // trigger a debug exception - unsafe { int!(1) }; - - fn divide_by_zero() { - unsafe { asm!("mov dx, 0; div dx" ::: "ax", "dx" : "volatile", "intel") } - } - - fn int_overflow() { - unsafe { asm!("mov al, 0xf0; add al, 0x10; into" ::: "ax", "dx" : "volatile", "intel") } - } - - fn stack_overflow() { - stack_overflow(); - } - - int_overflow(); - println!("It did not crash!"); loop {} } diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 22693792..266aa5a2 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -9,17 +9,15 @@ pub use self::area_frame_allocator::AreaFrameAllocator; pub use self::paging::remap_the_kernel; -pub use self::stack_allocator::Stack; use self::paging::PhysicalAddress; use multiboot2::BootInformation; mod area_frame_allocator; mod paging; -mod stack_allocator; pub const PAGE_SIZE: usize = 4096; -pub fn init(boot_info: &BootInformation) -> MemoryController { +pub fn init(boot_info: &BootInformation) { assert_has_not_been_called!("memory::init must be called only once"); let memory_map_tag = boot_info.memory_map_tag().expect("Memory map tag required"); @@ -60,34 +58,6 @@ pub fn init(boot_info: &BootInformation) -> MemoryController { for page in Page::range_inclusive(heap_start_page, heap_end_page) { active_table.map(page, paging::WRITABLE, &mut frame_allocator); } - - let stack_allocator = { - let stack_alloc_start = heap_end_page + 1; - let stack_alloc_end = stack_alloc_start + 100; - let stack_alloc_range = Page::range_inclusive(stack_alloc_start, stack_alloc_end); - stack_allocator::new_stack_allocator(stack_alloc_range) - }; - - MemoryController { - active_table: active_table, - frame_allocator: frame_allocator, - stack_allocator: stack_allocator, - } -} - -pub struct MemoryController { - active_table: paging::ActivePageTable, - frame_allocator: AreaFrameAllocator, - stack_allocator: stack_allocator::StackAllocator, -} - -impl MemoryController { - pub fn alloc_stack(&mut self, size_in_pages: usize) -> Option { - let &mut MemoryController { ref mut active_table, - ref mut frame_allocator, - ref mut stack_allocator } = self; - stack_allocator.alloc_stack(active_table, frame_allocator, size_in_pages) - } } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/memory/paging/mod.rs b/src/memory/paging/mod.rs index 2d7b48d7..6c80fbcd 100644 --- a/src/memory/paging/mod.rs +++ b/src/memory/paging/mod.rs @@ -11,7 +11,7 @@ pub use self::entry::*; use memory::{PAGE_SIZE, Frame, FrameAllocator}; use self::temporary_page::TemporaryPage; pub use self::mapper::Mapper; -use core::ops::{Deref, DerefMut, Add}; +use core::ops::{Deref, DerefMut}; use multiboot2::BootInformation; mod entry; @@ -37,7 +37,7 @@ impl Page { Page { number: address / PAGE_SIZE } } - pub fn start_address(&self) -> usize { + fn start_address(&self) -> usize { self.number * PAGE_SIZE } @@ -62,15 +62,6 @@ impl Page { } } -impl Add for Page { - type Output = Page; - - fn add(self, rhs: usize) -> Page { - Page { number: self.number + rhs } - } -} - -#[derive(Debug, Clone)] pub struct PageIter { start: Page, end: Page, diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs deleted file mode 100644 index 47e8b5c3..00000000 --- a/src/memory/stack_allocator.rs +++ /dev/null @@ -1,75 +0,0 @@ -use memory::paging::{self, Page, PageIter, ActivePageTable}; -use memory::{PAGE_SIZE, FrameAllocator}; - -pub fn new_stack_allocator(page_range: PageIter) -> StackAllocator { - StackAllocator { range: page_range } -} - -pub struct StackAllocator { - range: PageIter, -} - -impl StackAllocator { - pub fn alloc_stack(&mut self, - active_table: &mut ActivePageTable, - frame_allocator: &mut FA, - size_in_pages: usize) - -> Option { - if size_in_pages == 0 { - return None; - } - - let mut range = self.range.clone(); - - // try to allocate the stack pages and a guard page - let guard_page = range.next(); - let stack_start = range.next(); - let stack_end = if size_in_pages == 1 { - stack_start - } else { - range.nth(size_in_pages - 2) - }; - - match (guard_page, stack_start, stack_end) { - (Some(_), Some(start), Some(end)) => { - // success! write back updated range - self.range = range; - - // map stack pages to physical frames - for page in Page::range_inclusive(start, end) { - active_table.map(page, paging::WRITABLE, frame_allocator); - } - - // create a new stack - let top_of_stack = end.start_address() + PAGE_SIZE; - Some(Stack::new(top_of_stack, start.start_address())) - } - _ => None, /* not enough pages */ - } - } -} - -#[derive(Debug)] -pub struct Stack { - top: usize, - bottom: usize, -} - -impl Stack { - fn new(top: usize, bottom: usize) -> Stack { - assert!(top > bottom); - Stack { - top: top, - bottom: bottom, - } - } - - pub fn top(&self) -> usize { - self.top - } - - #[allow(dead_code)] - pub fn bottom(&self) -> usize { - self.bottom - } -} From 9090bf0c066fdd92fd2b4c22fbca4bd9313cddf5 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Thu, 29 Dec 2016 15:32:45 +0100 Subject: [PATCH 20/31] Set correct release date --- blog/post/double-faults.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blog/post/double-faults.md b/blog/post/double-faults.md index 469d49e8..3d8d8666 100644 --- a/blog/post/double-faults.md +++ b/blog/post/double-faults.md @@ -1,6 +1,6 @@ +++ title = "Double Faults" -date = "2016-11-08" +date = "2017-01-02" +++ In this post we explore double faults in detail. We also set up an _Interrupt Stack Table_ to catch double faults on a separate kernel stack. This way, we can completely prevent triple faults, even on kernel stack overflow. From dfa1cc48c93b73a182b0cbfcc22c0a24027cb534 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:35:06 +0100 Subject: [PATCH 21/31] Add a double fault handler (and trigger it) --- src/interrupts/mod.rs | 6 ++++++ src/lib.rs | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 6556236e..c0232760 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -93,6 +93,7 @@ lazy_static! { idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); + idt.set_handler(8, handler_with_error_code!(double_fault_handler)); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt @@ -150,3 +151,8 @@ extern "C" fn page_fault_handler(stack_frame: &ExceptionStackFrame, error_code: stack_frame); loop {} } + +extern "C" fn double_fault_handler(stack_frame: &ExceptionStackFrame, _error_code: u64) { + println!("\nEXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); + loop {} +} diff --git a/src/lib.rs b/src/lib.rs index 441b2e5f..6a8ca74f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,8 +56,8 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { // initialize our IDT interrupts::init(); - // trigger a breakpoint exception - unsafe { int!(3) }; + // trigger a debug exception + unsafe { int!(1) }; println!("It did not crash!"); loop {} From 47755d541d190333c4fccbf495da28eba9820d63 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:36:22 +0100 Subject: [PATCH 22/31] Trigger a stack overflow (still causes a triple fault) --- src/lib.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6a8ca74f..fed0b0dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,8 +56,12 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { // initialize our IDT interrupts::init(); - // trigger a debug exception - unsafe { int!(1) }; + fn stack_overflow() { + stack_overflow(); // for each recursion, the return address is pushed + } + + // trigger a stack overflow + stack_overflow(); println!("It did not crash!"); loop {} From 4347ff235fd9b3fcef6d603f6a898d60f010c1f3 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:45:14 +0100 Subject: [PATCH 23/31] Add a new stack_allocator module --- src/memory/mod.rs | 1 + src/memory/paging/mod.rs | 3 +- src/memory/stack_allocator.rs | 80 +++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 src/memory/stack_allocator.rs diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 266aa5a2..d902d17a 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -14,6 +14,7 @@ use multiboot2::BootInformation; mod area_frame_allocator; mod paging; +mod stack_allocator; pub const PAGE_SIZE: usize = 4096; diff --git a/src/memory/paging/mod.rs b/src/memory/paging/mod.rs index 6c80fbcd..fc09945b 100644 --- a/src/memory/paging/mod.rs +++ b/src/memory/paging/mod.rs @@ -37,7 +37,7 @@ impl Page { Page { number: address / PAGE_SIZE } } - fn start_address(&self) -> usize { + pub fn start_address(&self) -> usize { self.number * PAGE_SIZE } @@ -62,6 +62,7 @@ impl Page { } } +#[derive(Clone)] pub struct PageIter { start: Page, end: Page, diff --git a/src/memory/stack_allocator.rs b/src/memory/stack_allocator.rs new file mode 100644 index 00000000..0cbaee7d --- /dev/null +++ b/src/memory/stack_allocator.rs @@ -0,0 +1,80 @@ +use memory::paging::{self, Page, PageIter, ActivePageTable}; +use memory::{PAGE_SIZE, FrameAllocator}; + +pub struct StackAllocator { + range: PageIter, +} + +impl StackAllocator { + pub fn new(page_range: PageIter) -> StackAllocator { + StackAllocator { range: page_range } + } +} + +impl StackAllocator { + pub fn alloc_stack(&mut self, + active_table: &mut ActivePageTable, + frame_allocator: &mut FA, + size_in_pages: usize) + -> Option { + if size_in_pages == 0 { + return None; /* a zero sized stack makes no sense */ + } + + // clone the range, since we only want to change it on success + let mut range = self.range.clone(); + + // try to allocate the stack pages and a guard page + let guard_page = range.next(); + let stack_start = range.next(); + let stack_end = if size_in_pages == 1 { + stack_start + } else { + // choose the (size_in_pages-2)th element, since index + // starts at 0 and we already allocated the start page + range.nth(size_in_pages - 2) + }; + + match (guard_page, stack_start, stack_end) { + (Some(_), Some(start), Some(end)) => { + // success! write back updated range + self.range = range; + + // map stack pages to physical frames + for page in Page::range_inclusive(start, end) { + active_table.map(page, paging::WRITABLE, frame_allocator); + } + + // create a new stack + let top_of_stack = end.start_address() + PAGE_SIZE; + Some(Stack::new(top_of_stack, start.start_address())) + } + _ => None, /* not enough pages */ + } + } +} + +#[derive(Debug)] +pub struct Stack { + top: usize, + bottom: usize, +} + +impl Stack { + fn new(top: usize, bottom: usize) -> Stack { + assert!(top > bottom); + Stack { + top: top, + bottom: bottom, + } + } + + pub fn top(&self) -> usize { + self.top + } + + #[allow(dead_code)] + pub fn bottom(&self) -> usize { + self.bottom + } +} From 8ad76ba496b6b7db21f386ae22843c5b35859506 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:49:33 +0100 Subject: [PATCH 24/31] Create a new MemoryAllocator (and initialize it) --- src/memory/mod.rs | 31 ++++++++++++++++++++++++++++++- src/memory/paging/mod.rs | 10 +++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/memory/mod.rs b/src/memory/mod.rs index d902d17a..e1355735 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -9,6 +9,7 @@ pub use self::area_frame_allocator::AreaFrameAllocator; pub use self::paging::remap_the_kernel; +pub use self::stack_allocator::Stack; use self::paging::PhysicalAddress; use multiboot2::BootInformation; @@ -18,7 +19,7 @@ mod stack_allocator; pub const PAGE_SIZE: usize = 4096; -pub fn init(boot_info: &BootInformation) { +pub fn init(boot_info: &BootInformation) -> MemoryController { assert_has_not_been_called!("memory::init must be called only once"); let memory_map_tag = boot_info.memory_map_tag().expect("Memory map tag required"); @@ -59,6 +60,34 @@ pub fn init(boot_info: &BootInformation) { for page in Page::range_inclusive(heap_start_page, heap_end_page) { active_table.map(page, paging::WRITABLE, &mut frame_allocator); } + + let stack_allocator = { + let stack_alloc_start = heap_end_page + 1; + let stack_alloc_end = stack_alloc_start + 100; + let stack_alloc_range = Page::range_inclusive(stack_alloc_start, stack_alloc_end); + stack_allocator::StackAllocator::new(stack_alloc_range) + }; + + MemoryController { + active_table: active_table, + frame_allocator: frame_allocator, + stack_allocator: stack_allocator, + } +} + +pub struct MemoryController { + active_table: paging::ActivePageTable, + frame_allocator: AreaFrameAllocator, + stack_allocator: stack_allocator::StackAllocator, +} + +impl MemoryController { + pub fn alloc_stack(&mut self, size_in_pages: usize) -> Option { + let &mut MemoryController { ref mut active_table, + ref mut frame_allocator, + ref mut stack_allocator } = self; + stack_allocator.alloc_stack(active_table, frame_allocator, size_in_pages) + } } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/memory/paging/mod.rs b/src/memory/paging/mod.rs index fc09945b..bd6dd6b2 100644 --- a/src/memory/paging/mod.rs +++ b/src/memory/paging/mod.rs @@ -11,7 +11,7 @@ pub use self::entry::*; use memory::{PAGE_SIZE, Frame, FrameAllocator}; use self::temporary_page::TemporaryPage; pub use self::mapper::Mapper; -use core::ops::{Deref, DerefMut}; +use core::ops::{Add, Deref, DerefMut}; use multiboot2::BootInformation; mod entry; @@ -62,6 +62,14 @@ impl Page { } } +impl Add for Page { + type Output = Page; + + fn add(self, rhs: usize) -> Page { + Page { number: self.number + rhs } + } +} + #[derive(Clone)] pub struct PageIter { start: Page, From e08bd375aedeccd1077d33d579f2a0ff08454674 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:51:56 +0100 Subject: [PATCH 25/31] Allocate a new double fault stack in interrupts::init --- src/interrupts/mod.rs | 7 ++++++- src/lib.rs | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index c0232760..aad3b782 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -7,6 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use memory::MemoryController; + mod idt; macro_rules! save_scratch_registers { @@ -100,7 +102,10 @@ lazy_static! { }; } -pub fn init() { +pub fn init(memory_controller: &mut MemoryController) { + let double_fault_stack = memory_controller.alloc_stack(1) + .expect("could not allocate double fault stack"); + IDT.load(); } diff --git a/src/lib.rs b/src/lib.rs index fed0b0dd..d7f8ccb8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,10 +51,10 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) { enable_write_protect_bit(); // set up guard page and map the heap pages - memory::init(boot_info); + let mut memory_controller = memory::init(boot_info); // initialize our IDT - interrupts::init(); + interrupts::init(&mut memory_controller); fn stack_overflow() { stack_overflow(); // for each recursion, the return address is pushed From d330a67659624d208698c1aa2727108c6acb793e Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 15:54:54 +0100 Subject: [PATCH 26/31] Create a new TaskStateSegment (that contains the double fault stack in its IST) --- src/interrupts/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index aad3b782..5c19c5c7 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -8,6 +8,7 @@ // except according to those terms. use memory::MemoryController; +use x86::bits64::task::TaskStateSegment; mod idt; @@ -102,10 +103,15 @@ lazy_static! { }; } +const DOUBLE_FAULT_IST_INDEX: usize = 0; + pub fn init(memory_controller: &mut MemoryController) { let double_fault_stack = memory_controller.alloc_stack(1) .expect("could not allocate double fault stack"); + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + IDT.load(); } From 31ffe60052ebb6b3f132c86f05537980cf0f6f49 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 16:05:42 +0100 Subject: [PATCH 27/31] Add a new GDT module --- src/interrupts/gdt.rs | 96 +++++++++++++++++++++++++++++++++++++++++++ src/interrupts/mod.rs | 1 + 2 files changed, 97 insertions(+) create mode 100644 src/interrupts/gdt.rs diff --git a/src/interrupts/gdt.rs b/src/interrupts/gdt.rs new file mode 100644 index 00000000..d25ae1ff --- /dev/null +++ b/src/interrupts/gdt.rs @@ -0,0 +1,96 @@ +use x86::bits64::task::TaskStateSegment; +use x86::shared::segmentation::SegmentSelector; +use x86::shared::PrivilegeLevel; + +pub struct Gdt { + table: [u64; 8], + next_free: usize, +} + +impl Gdt { + pub fn new() -> Gdt { + Gdt { + table: [0; 8], + next_free: 1, + } + } + + pub fn add_entry(&mut self, entry: Descriptor) -> SegmentSelector { + let index = match entry { + Descriptor::UserSegment(value) => self.push(value), + Descriptor::SystemSegment(value_low, value_high) => { + let index = self.push(value_low); + self.push(value_high); + index + } + }; + SegmentSelector::new(index as u16, PrivilegeLevel::Ring0) + } + + fn push(&mut self, value: u64) -> usize { + if self.next_free < self.table.len() { + let index = self.next_free; + self.table[index] = value; + self.next_free += 1; + index + } else { + panic!("GDT full"); + } + } + + pub fn load(&'static self) { + use x86::shared::dtables::{DescriptorTablePointer, lgdt}; + use x86::shared::segmentation; + use core::mem::size_of; + + let ptr = DescriptorTablePointer { + base: self.table.as_ptr() as *const segmentation::SegmentDescriptor, + limit: (self.table.len() * size_of::() - 1) as u16, + }; + + unsafe { lgdt(&ptr) }; + } +} + +pub enum Descriptor { + UserSegment(u64), + SystemSegment(u64, u64), +} + +impl Descriptor { + pub fn kernel_code_segment() -> Descriptor { + let flags = USER_SEGMENT | PRESENT | EXECUTABLE | LONG_MODE; + Descriptor::UserSegment(flags.bits()) + } + + pub fn tss_segment(tss: &'static TaskStateSegment) -> Descriptor { + use core::mem::size_of; + use bit_field::BitField; + + let ptr = tss as *const _ as u64; + + let mut low = PRESENT.bits(); + // base + low.set_range(16..40, ptr.get_range(0..24)); + low.set_range(56..64, ptr.get_range(24..32)); + // limit (the `-1` in needed since the bound is inclusive) + low.set_range(0..16, (size_of::() - 1) as u64); + // type (0b1001 = available 64-bit tss) + low.set_range(40..44, 0b1001); + + let mut high = 0; + high.set_range(0..32, ptr.get_range(32..64)); + + Descriptor::SystemSegment(low, high) + } +} + +bitflags! { + flags DescriptorFlags: u64 { + const CONFORMING = 1 << 42, + const EXECUTABLE = 1 << 43, + const USER_SEGMENT = 1 << 44, + const PRESENT = 1 << 47, + const LONG_MODE = 1 << 53, + } +} diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 5c19c5c7..691dd38a 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -11,6 +11,7 @@ use memory::MemoryController; use x86::bits64::task::TaskStateSegment; mod idt; +mod gdt; macro_rules! save_scratch_registers { () => { From c0d7206249342ba5465a5283ae5332716378648a Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 16:23:52 +0100 Subject: [PATCH 28/31] Make TSS and GDT statics by using spin::Once --- src/interrupts/mod.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index 691dd38a..cec9dd7e 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -9,6 +9,7 @@ use memory::MemoryController; use x86::bits64::task::TaskStateSegment; +use spin::Once; mod idt; mod gdt; @@ -90,6 +91,8 @@ macro_rules! handler_with_error_code { }} } +const DOUBLE_FAULT_IST_INDEX: usize = 0; + lazy_static! { static ref IDT: idt::Idt = { let mut idt = idt::Idt::new(); @@ -104,14 +107,26 @@ lazy_static! { }; } -const DOUBLE_FAULT_IST_INDEX: usize = 0; +static TSS: Once = Once::new(); +static GDT: Once = Once::new(); pub fn init(memory_controller: &mut MemoryController) { let double_fault_stack = memory_controller.alloc_stack(1) .expect("could not allocate double fault stack"); - let mut tss = TaskStateSegment::new(); - tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + let tss = TSS.call_once(|| { + let mut tss = TaskStateSegment::new(); + tss.ist[DOUBLE_FAULT_IST_INDEX] = double_fault_stack.top() as u64; + tss + }); + + let gdt = GDT.call_once(|| { + let mut gdt = gdt::Gdt::new(); + let code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); + let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + gdt + }); + gdt.load(); IDT.load(); } From 006619920e3f5d10e12ba17521a540866aadd5da Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 16:24:57 +0100 Subject: [PATCH 29/31] Reload code segment register and load TSS --- src/interrupts/mod.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index cec9dd7e..d059cffe 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -111,6 +111,9 @@ static TSS: Once = Once::new(); static GDT: Once = Once::new(); pub fn init(memory_controller: &mut MemoryController) { + use x86::shared::segmentation::{SegmentSelector, set_cs}; + use x86::shared::task::load_tr; + let double_fault_stack = memory_controller.alloc_stack(1) .expect("could not allocate double fault stack"); @@ -120,14 +123,23 @@ pub fn init(memory_controller: &mut MemoryController) { tss }); + let mut code_selector = SegmentSelector::empty(); + let mut tss_selector = SegmentSelector::empty(); let gdt = GDT.call_once(|| { let mut gdt = gdt::Gdt::new(); - let code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); - let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); + code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment()); + tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&tss)); gdt }); gdt.load(); + unsafe { + // reload code segment register + set_cs(code_selector); + // load TSS + load_tr(tss_selector); + } + IDT.load(); } From a43b23b6190e9206f7a201f04e6780fed8a04545 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 16:51:01 +0100 Subject: [PATCH 30/31] Catch double faults on a separate stack --- src/interrupts/idt.rs | 5 +++-- src/interrupts/mod.rs | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/interrupts/idt.rs b/src/interrupts/idt.rs index 9c191cbb..b139b648 100644 --- a/src/interrupts/idt.rs +++ b/src/interrupts/idt.rs @@ -107,9 +107,10 @@ impl EntryOptions { self } - #[allow(dead_code)] pub fn set_stack_index(&mut self, index: u16) -> &mut Self { - self.0.set_range(0..3, index); + // The hardware IST index starts at 1, but our software IST index + // starts at 0. Therefore we need to add 1 here. + self.0.set_range(0..3, index + 1); self } } diff --git a/src/interrupts/mod.rs b/src/interrupts/mod.rs index d059cffe..3d4fc5b9 100644 --- a/src/interrupts/mod.rs +++ b/src/interrupts/mod.rs @@ -100,7 +100,8 @@ lazy_static! { idt.set_handler(0, handler!(divide_by_zero_handler)); idt.set_handler(3, handler!(breakpoint_handler)); idt.set_handler(6, handler!(invalid_opcode_handler)); - idt.set_handler(8, handler_with_error_code!(double_fault_handler)); + idt.set_handler(8, handler_with_error_code!(double_fault_handler)) + .set_stack_index(DOUBLE_FAULT_IST_INDEX as u16); idt.set_handler(14, handler_with_error_code!(page_fault_handler)); idt From c39070da4ae1e6444b61c05d9e10259723906126 Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Mon, 2 Jan 2017 19:15:50 +0100 Subject: [PATCH 31/31] =?UTF-8?q?=E2=80=9CDouble=20Faults=E2=80=9D=20is=20?= =?UTF-8?q?the=2012th=20post?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- blog/post/{double-faults.md => 12-double-faults.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename blog/post/{double-faults.md => 12-double-faults.md} (100%) diff --git a/blog/post/double-faults.md b/blog/post/12-double-faults.md similarity index 100% rename from blog/post/double-faults.md rename to blog/post/12-double-faults.md