mirror of
https://github.com/phil-opp/blog_os.git
synced 2025-12-16 14:27:49 +00:00
Compare commits
43 Commits
threads
...
better_exc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87d95dbf0d | ||
|
|
91672e3cac | ||
|
|
a91322017e | ||
|
|
f0df9fb937 | ||
|
|
135d8891f3 | ||
|
|
58de55a2d4 | ||
|
|
94d4f8df8e | ||
|
|
d5c84a860a | ||
|
|
ce6b43cc9b | ||
|
|
090e0785ff | ||
|
|
5ce6655aa1 | ||
|
|
e8d1f96b76 | ||
|
|
e09df45b9b | ||
|
|
7f80db6fb4 | ||
|
|
c764e6bb43 | ||
|
|
0ec126fc7d | ||
|
|
258a7d1aba | ||
|
|
6e20fe17db | ||
|
|
2aeeaecf86 | ||
|
|
e63dda6953 | ||
|
|
af8578ec78 | ||
|
|
4c9f7cfbee | ||
|
|
c26fb53547 | ||
|
|
7871e4008a | ||
|
|
e1a90fe3ab | ||
|
|
e17fd2d0de | ||
|
|
2df8517816 | ||
|
|
550a99cfd4 | ||
|
|
86a757cb61 | ||
|
|
bf72618647 | ||
|
|
308b033ea9 | ||
|
|
07bef978ad | ||
|
|
ee48ec5e29 | ||
|
|
6590531a41 | ||
|
|
df99382cda | ||
|
|
979663acda | ||
|
|
8bb46c6b62 | ||
|
|
4eda7993a2 | ||
|
|
b949fba62f | ||
|
|
83978e8417 | ||
|
|
9113a63f5e | ||
|
|
9e45cf65bc | ||
|
|
06fb4d6596 |
@@ -15,10 +15,4 @@ addons:
|
||||
packages:
|
||||
- nasm
|
||||
|
||||
script: bash scripts/travis-build.sh
|
||||
|
||||
after_success: bash scripts/travis-trigger-hugo-build.sh
|
||||
|
||||
env:
|
||||
global:
|
||||
secure: YQvZpLe32k8N+vLMa8PB80LKJdiD4WEUuw2lPFk3AJLSK6dojWq8MKjGuedxLvtmjURu401m7D06NFCjwd6j0dpR4L2xmVJ/EgufP2Wc62BUa2XvIIU/zURv0dFpGvaOTyHKl4j46SIk2LDugIzB1WyYRqeYTCpiN0xHuWsywXjPpCCJ04ftxHwrjYjzA8vvtyehjMeN4HC3J9r6anFlN9Ka7RFVSQ0Bun79pd6Xa/OPIxTsZuw24Ru4v458e4QxVh0atJif0lqmu5tZAeR/S0FTnG3XlKqfIYDmDWKJVKUwk6AHnsgaYAFASQhi9XOcr+cMir38/8k8FQpx80hjqkWXCZafUWuypgQnZOKS8K1oZLmMAQnrssM8HbMZEIYH40I8G5MqcpgPUvdwFO2PMGRNimMk6bEvrtheNwZn1XBTxgNOW4huCBoR0/E95FFGiTH5HL2kZi5N5+1EGJMdhPGjybIdJgawsAY2SOdw6rvHgf+ZMkJFBU2R9ftS88DHygMhkOifLDUq3GFPiFePNrE97LBFH3WN48fxPTcske00tAPTMcMGvw8zDBH4tZAHQikH6oMZFzKNQ0GvBTiKNvLMl1HaCcM/h1QmGXho4HtVD6PNrgOocdVxxXBcntQMVewkcr3hwDwaAZ/7+AqjeG9M+V2wEkI20Y4XYiq24HY=
|
||||
script: make
|
||||
|
||||
@@ -4,21 +4,20 @@ name = "blog_os"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
bit_field = "0.1.0"
|
||||
bit_field = "0.7.0"
|
||||
bitflags = "0.7.0"
|
||||
multiboot2 = "0.1.0"
|
||||
once = "0.2.1"
|
||||
rlibc = "0.1.4"
|
||||
spin = "0.3.4"
|
||||
volatile = "0.1.0"
|
||||
|
||||
[dependencies.hole_list_allocator]
|
||||
path = "libs/hole_list_allocator"
|
||||
|
||||
[dependencies.multiboot2]
|
||||
git = "https://github.com/phil-opp/multiboot2-elf64"
|
||||
|
||||
[dependencies.x86]
|
||||
default-features = false
|
||||
version = "0.7.1"
|
||||
version = "0.8.0"
|
||||
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,4 +1,4 @@
|
||||
# Copyright 2015 Philipp Oppermann. See the README.md
|
||||
# Copyright 2016 Philipp Oppermann. See the README.md
|
||||
# file at the top-level directory of this distribution.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
42
README.md
42
README.md
@@ -1,42 +1,14 @@
|
||||
# Blog OS
|
||||
# Blog OS (Better Exception Messages)
|
||||
[](https://travis-ci.org/phil-opp/blog_os/branches)
|
||||
|
||||
[](https://travis-ci.org/phil-opp/blog_os) [](https://gitter.im/phil-opp/blog_os?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
This repository contains the source code for the [Better Exception Messages](http://os.phil-opp.com/better-exception-messages.html) post of the [Writing an OS in Rust](http://os.phil-opp.com) series.
|
||||
|
||||
This repository contains the source code for the _Writing an OS in Rust_ series at [os.phil-opp.com](http://os.phil-opp.com).
|
||||
|
||||
## Bare Bones
|
||||
- [A Minimal x86 Kernel](http://os.phil-opp.com/multiboot-kernel.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/multiboot_bootstrap))
|
||||
- [Entering Long Mode](http://os.phil-opp.com/entering-longmode.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/entering_longmode))
|
||||
- [Set Up Rust](http://os.phil-opp.com/set-up-rust.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/set_up_rust))
|
||||
- [Printing to Screen](http://os.phil-opp.com/printing-to-screen.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/printing_to_screen))
|
||||
|
||||
## Memory Management
|
||||
- [Allocating Frames](http://os.phil-opp.com/allocating-frames.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/allocating_frames))
|
||||
- [Page Tables](http://os.phil-opp.com/modifying-page-tables.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/page_tables))
|
||||
- [Remap the Kernel](http://os.phil-opp.com/remap-the-kernel.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/remap_the_kernel))
|
||||
- [Kernel Heap](http://os.phil-opp.com/kernel-heap.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/kernel_heap))
|
||||
|
||||
## Exceptions
|
||||
- [Catching Exceptions](http://os.phil-opp.com/catching-exceptions.html)
|
||||
([source code](https://github.com/phil-opp/blog_os/tree/catching_exceptions))
|
||||
|
||||
## Additional Resources
|
||||
- [Cross Compile Binutils](http://os.phil-opp.com/cross-compile-binutils.html)
|
||||
- [Cross Compile libcore](http://os.phil-opp.com/cross-compile-libcore.html)
|
||||
- [Set Up GDB](http://os.phil-opp.com/set-up-gdb.html)
|
||||
**Check out the [master branch](https://github.com/phil-opp/blog_os) for more information.**
|
||||
|
||||
## Building
|
||||
You need to have `nasm`, `grub-mkrescue`, `xorriso`, `qemu` and a nighly Rust compiler installed. Then you can run it using `make run`.
|
||||
You need to have `nasm`, `grub-mkrescue`, `xorriso`, `qemu`, and a nightly Rust compiler installed. Then you can run it using `make run`.
|
||||
|
||||
Please file an issue if you run into any problems.
|
||||
Please file an issue if you have any problems.
|
||||
|
||||
## License
|
||||
The source code is dual-licensed under MIT or the Apache License (Version 2.0). This excludes the `blog` directory.
|
||||
The source code is dual-licensed under MIT or the Apache License (Version 2.0).
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
+++
|
||||
title = "Cross Compile Binutils"
|
||||
+++
|
||||
|
||||
The [GNU Binutils] are a collection of various binary tools such as `ld`, `as`, `objdump`, or `readelf`. These tools are platform-specific, so you need to compile them again if your host system and target system are different. In our case, we need `ld` and `objdump` for the x86_64 architecture.
|
||||
[GNU Binutils]: https://www.gnu.org/software/binutils/
|
||||
|
||||
## Building Setup
|
||||
First, you need to download a current binutils version from [here][download] \(the latest one is near the bottom). After extracting, you should have a folder named `binutils-2.X` where `X` is for example `25.1`. Now can create and switch to a new folder for building (recommended):
|
||||
[download]: ftp://sourceware.org/pub/binutils/snapshots
|
||||
|
||||
```bash
|
||||
mkdir build-binutils
|
||||
cd build-binutils
|
||||
```
|
||||
|
||||
## Configuration
|
||||
We execute binutils's `configure` and pass a lot of arguments to it (replace the `X` with the version number):
|
||||
|
||||
```bash
|
||||
../binutils-2.X/configure --target=x86_64-elf --prefix="$HOME/opt/cross" \
|
||||
--disable-nls --disable-werror \
|
||||
--disable-gdb --disable-libdecnumber --disable-readline --disable-sim
|
||||
```
|
||||
- The `target` argument specifies the the x86_64 target architecture.
|
||||
- The `prefix` argument selects the installation directory, you can change it if you like. But be careful that you do not overwrite your system's binutils.
|
||||
- The `disable-nls` flag disables native language support (so you'll get the same english error messages). It also reduces build dependencies.
|
||||
- The `disable-werror` turns all warnings into errors.
|
||||
- The last line disables features we don't need to reduce compile time.
|
||||
|
||||
## Building it
|
||||
Now we can build and install it to the location supplied as `prefix` (it will take a while):
|
||||
|
||||
```bash
|
||||
make
|
||||
make install
|
||||
```
|
||||
Now you should have multiple `x86_64-elf-XXX` files in `$HOME/opt/cross/bin`.
|
||||
|
||||
## Adding it to the PATH
|
||||
To use the tools from the command line easily, you should add the `bin` folder to your PATH:
|
||||
|
||||
```bash
|
||||
export PATH="$HOME/opt/cross/bin:$PATH"
|
||||
```
|
||||
If you add this line to your e.g. `.bashrc`, the `x86_64-elf-XXX` commands are always available.
|
||||
@@ -1,47 +0,0 @@
|
||||
+++
|
||||
title = "Cross Compiling: libcore"
|
||||
+++
|
||||
|
||||
So you're getting an ``error: can't find crate for `core` [E0463]`` when using `--target x86_64-unknown-linux-gnu`. That means that you're not running Linux or not using using a x86_64 processor.
|
||||
|
||||
**If you have an x86_64 processor and want a quick fix**, try it with `x86_64-pc-windows-gnu` or `x86_64-apple-darwin` (or simply omit the explicit `--target`).
|
||||
|
||||
The idiomatic alternative and the only option for non x86_64 CPUs is described below. Note that you need to [cross compile binutils], too.
|
||||
[cross compile binutils]: /cross-compile-binutils.html
|
||||
|
||||
## Libcore
|
||||
The core library is a dependency-free library that is added implicitly when using `#![no_std]`. It provides basic standard library features like Option or Iterator. The core library is installed together with the rust compiler (just like the std library). But the installed libcore is specific to your architecture. If you aren't working on x86_64 Linux and pass `‑‑target x86_64‑unknown‑linux‑gnu` to cargo, it can't find a x86_64 libcore. To fix this, you can either download it or build it using cargo.
|
||||
|
||||
## Download it
|
||||
You need to download the 64-bit Linux Rust build corresponding to your installed nightly. You can either just update to the current nightly and download the current nightly source [here][Rust downloads]. Or you retrieve your installed version through `rustc --version` and search the corresponding subfolder [here](http://static.rust-lang.org/dist/).
|
||||
[Rust downloads]: https://www.rust-lang.org/downloads.html
|
||||
|
||||
After extracting it and you need to copy the `x86_64-unknown-linux-gnu` folder in `rust-std-x86_64-unknown-linux-gnu/lib/rustlib` to your local Rust installation. For multirust, the right target folder is `~/.multirust/toolchains/nightly/lib/rustlib`. That's it!
|
||||
|
||||
## Build it using cargo
|
||||
The alternative is to use cargo to build libcore. But this variant has one big disadvantage: You have to modify each crate you depend on because it needs to use the same libcore. So you can't just add a crates.io dependency anymore, you need to fork and modify it first.
|
||||
|
||||
If you want to build libcore anyway, you need its source code. You can either clone the [rust repository] \(makes updates easy) or manually [download the Rust source][Rust downloads] \(faster and less memory).
|
||||
[rust repository]: https://github.com/rust-lang/rust
|
||||
|
||||
Now we create a new cargo project named `core`, but delete its `src` folder:
|
||||
|
||||
```bash
|
||||
cargo new core
|
||||
rm -r core/src
|
||||
```
|
||||
|
||||
Then we create a symbolic link named `src` to the `rust/src/libcore` of the Rust source code:
|
||||
|
||||
```bash
|
||||
ln -s ../rust/src/libcore core/src
|
||||
```
|
||||
|
||||
To use our new libcore crate (instead of the one installed together with rust) in our OS, we need to add it as a local dependency in the `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
...
|
||||
[dependencies.core]
|
||||
path = "core"
|
||||
```
|
||||
Now cargo compiles libcore for all Rust targets automatically.
|
||||
@@ -1,74 +0,0 @@
|
||||
+++
|
||||
title = "Set Up GDB"
|
||||
+++
|
||||
|
||||
There are a lot of things that can go wrong when developing an OS. So it's a good idea to add a debugger to our toolset, which allows us to set breakpoints and examine variables. We will use [GDB](https://www.gnu.org/software/gdb/) as QEMU supports it out of the box.
|
||||
|
||||
### QEMU parameters
|
||||
To make QEMU listen for a gdb connection, we add the `-s` flag to the `run` target in our Makefile:
|
||||
|
||||
```make
|
||||
run: $(iso)
|
||||
@qemu-system-x86_64 -cdrom $(iso) -s
|
||||
```
|
||||
This allows us to connect a debugger at any time, for example to investigate why a panic occurred.
|
||||
|
||||
To wait for a debugger connection on startup, we add a `debug` target to the Makefile:
|
||||
|
||||
```make
|
||||
debug: $(iso)
|
||||
@qemu-system-x86_64 -cdrom $(iso) -s -S
|
||||
```
|
||||
It is identical to the `run` target except for the additional `-S` flag. This flag causes QEMU to freeze on startup and wait until a debugger is connected. Now it _should_ be possible to connect gdb.
|
||||
|
||||
### The annoying issue
|
||||
Unfortunately gdb has an issue with the switch to long mode. If we connect when the CPU is already in long mode, everything works fine. But if we use `make debug` and thus connect right at the start, we get an error when we set a breakpoint in 64-bit mode:
|
||||
|
||||
```
|
||||
Remote 'g' packet reply is too long: [a very long number]
|
||||
```
|
||||
This issue is known [since 2012][gdb issue patch] but it is still not fixed. Maybe we find the reason in the [issue thread][gdb issue thread]:
|
||||
|
||||
[gdb issue patch]: http://www.cygwin.com/ml/gdb-patches/2012-03/msg00116.html
|
||||
[gdb issue thread]: https://sourceware.org/bugzilla/show_bug.cgi?id=13984#c11
|
||||
|
||||
> from my (limited) experience, unless you ping the gdb-patches list weekly, this patch is more likely to remain forgotten :-)
|
||||
|
||||
Pretty frustrating, especially since the patch is [very small][gdb patch commit].
|
||||
|
||||
[gdb patch commit]: https://github.com/phil-opp/binutils-gdb/commit/9e88c451844ad38bb82fe77d1f388c87c41b4520
|
||||
|
||||
### Building the patched GDB
|
||||
So the only way to use gdb with `make debug` is to build a modified gdb version that includes the patch. I created a repository with the patched GDB to make this easy. Just follow [the build instructions].
|
||||
|
||||
[the build instructions]: https://github.com/phil-opp/binutils-gdb#gdb-for-64-bit-rust-operating-systems
|
||||
|
||||
### Connecting GDB
|
||||
Now you should have a `rust-os-gdb` subfolder. In its `bin` directory you find the `gdb` executable and the `rust-gdb` script, which [improves rendering of Rust types]. To make it easy to use it for our OS, we add a `make gdb` target to our Makefile:
|
||||
|
||||
[improves rendering of Rust types]: https://michaelwoerister.github.io/2015/03/27/rust-xxdb.html
|
||||
|
||||
```make
|
||||
gdb:
|
||||
@rust-os-gdb/bin/rust-gdb "build/kernel-x86_64.bin" -ex "target remote :1234"
|
||||
```
|
||||
It loads the debug information from our kernel binary and connects to the `localhost:1234` port, on which QEMU listens by default.
|
||||
|
||||
### Using GDB
|
||||
After connecting to QEMU, you can use various gdb commands to control execution and examine data. All commands can be abbreviated as long they are still unique. For example, you can write `c` or `cont` instead of `continue`. The most important commands are:
|
||||
|
||||
- `help` or `h`: Show the help.
|
||||
- `break` or `b`: Set a breakpoint. It possible to break on functions such as `rust_main` or on source lines such as `lib.rs:42`. You can use tab for autocompletion and omit parts of the path as long it's still unique. To modify breakpoints, you can use `disable`, `enable`, and `delete` plus the breakpoint number.
|
||||
- `continue` or `c`: Continue execution until a breakpoint is reached.
|
||||
- `next` or `n`: Step over the current line and break on the next line of the function. Sometimes this doesn't work in Rust OSes.
|
||||
- `step` or `s`: Step into the current line, i.e. jump to the called function. Sometimes this doesn't work in Rust OSes.
|
||||
- `list` or `l`: Shows the source code around the current position.
|
||||
- `print` or `p`: Prints the value of a variable. You can use Cs `*` and `&` operators. To print in hexadecimal, use `p/x`.
|
||||
- `tui enable`: Enables the text user interface, which provides a graphical interface (see below). To disable it again, run `tui disable`.
|
||||
|
||||

|
||||
|
||||
Of course there are many more commands. Feel free to send a PR if you think this list is missing something important. For a more complete GDB overview, check out [Beej's Quick Guide][bggdb] or the [website for Harvard's CS161 course][CS161].
|
||||
|
||||
[bggdb]: http://beej.us/guide/bggdb/
|
||||
[CS161]: http://www.eecs.harvard.edu/~cs161/resources/gdb.html
|
||||
@@ -1,9 +0,0 @@
|
||||
+++
|
||||
title = "Contact"
|
||||
+++
|
||||
|
||||
Philipp Oppermann
|
||||
|
||||
<big>contact@phil-opp.com</big>
|
||||
|
||||
<small>Dr.Gustav-Knodel-Str. 7b, 76344 Eggenstein, Germany</small>
|
||||
@@ -1,325 +0,0 @@
|
||||
+++
|
||||
title = "A minimal x86 kernel"
|
||||
slug = "multiboot-kernel"
|
||||
date = "2015-08-18"
|
||||
aliases = [
|
||||
"/2015/08/18/multiboot-kernel/",
|
||||
"/rust-os/multiboot-kernel.html",
|
||||
]
|
||||
+++
|
||||
|
||||
This post explains how to create a minimal x86 operating system kernel. In fact, it will just boot and print `OK` to the screen. The following blog posts we will extend it using the [Rust] programming language.
|
||||
|
||||
[Rust]: http://www.rust-lang.org/
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
I tried to explain everything in detail and to keep the code as simple as possible. If you have any questions, suggestions or other issues, please leave a comment or [create an issue] on Github. The source code is available in a [repository][source code], too.
|
||||
|
||||
[create an issue]: https://github.com/phil-opp/blog_os/issues
|
||||
[source code]: https://github.com/phil-opp/blog_os/tree/multiboot_bootstrap/src/arch/x86_64
|
||||
|
||||
Note that this tutorial is written mainly for Linux. For some known problems on OS X see the comment section and [this issue][mac os issue]. If you want to use a virtual Linux machine, you can find instructions and a Vagrantfile in Ashley Willams's [x86-kernel repository].
|
||||
|
||||
[mac os issue]: https://github.com/phil-opp/blog_os/issues/55
|
||||
[x86-kernel repository]: https://github.com/ashleygwilliams/x86-kernel
|
||||
|
||||
## Overview
|
||||
When you turn on a computer, it loads the [BIOS] from some special flash memory. The BIOS runs self test and initialization routines of the hardware, then it looks for bootable devices. If it finds one, the control is transferred to its _bootloader_, which is a small portion of executable code stored at the device's beginning. The bootloader has to determine the location of the kernel image on the device and load it into memory. It also needs to switch the CPU to the so-called [protected mode] because x86 CPUs start in the very limited [real mode] by default (to be compatible to programs from 1978).
|
||||
|
||||
[BIOS]: https://en.wikipedia.org/wiki/BIOS
|
||||
[protected mode]: https://en.wikipedia.org/wiki/Protected_mode
|
||||
[real mode]: http://wiki.osdev.org/Real_Mode
|
||||
|
||||
We won't write a bootloader because that would be a complex project on its own (if you really want to do it, check out [_Rolling Your Own Bootloader_]). Instead we will use one of the [many well-tested bootloaders][bootloader comparison] out there. But which one?
|
||||
|
||||
[_Rolling Your Own Bootloader_]: http://wiki.osdev.org/Rolling_Your_Own_Bootloader
|
||||
[bootloader comparison]: https://en.wikipedia.org/wiki/Comparison_of_boot_loaders
|
||||
|
||||
## Multiboot
|
||||
Fortunately there is a bootloader standard: the [Multiboot Specification][multiboot]. Our kernel just needs to indicate that it supports Multiboot and every Multiboot-compliant bootloader can boot it. We will use the Multiboot 2 specification ([PDF][Multiboot 2]) together with the well-known [GRUB 2] bootloader.
|
||||
|
||||
[multiboot]: https://en.wikipedia.org/wiki/Multiboot_Specification
|
||||
[multiboot 2]: http://nongnu.askapache.com/grub/phcoder/multiboot.pdf
|
||||
[grub 2]: http://wiki.osdev.org/GRUB_2
|
||||
|
||||
To indicate our Multiboot 2 support to the bootloader, our kernel must start with a _Multiboot Header_, which has the following format:
|
||||
|
||||
Field | Type | Value
|
||||
------------- | --------------- | ----------------------------------------
|
||||
magic number | u32 | `0xE85250D6`
|
||||
architecture | u32 | `0` for i386, `4` for MIPS
|
||||
header length | u32 | total header size, including tags
|
||||
checksum | u32 | `-(magic + architecture + header_length)`
|
||||
tags | variable |
|
||||
end tag | (u16, u16, u32) | `(0, 0, 8)`
|
||||
|
||||
Converted to a x86 assembly file it looks like this (Intel syntax):
|
||||
|
||||
```nasm
|
||||
section .multiboot_header
|
||||
header_start:
|
||||
dd 0xe85250d6 ; magic number (multiboot 2)
|
||||
dd 0 ; architecture 0 (protected mode i386)
|
||||
dd header_end - header_start ; header length
|
||||
; checksum
|
||||
dd 0x100000000 - (0xe85250d6 + 0 + (header_end - header_start))
|
||||
|
||||
; insert optional multiboot tags here
|
||||
|
||||
; required end tag
|
||||
dw 0 ; type
|
||||
dw 0 ; flags
|
||||
dd 8 ; size
|
||||
header_end:
|
||||
```
|
||||
If you don't know x86 assembly, here is some quick guide:
|
||||
|
||||
- the header will be written to a section named `.multiboot_header` (we need this later)
|
||||
- `header_start` and `header_end` are _labels_ that mark a memory location, we use them to calculate the header length easily
|
||||
- `dd` stands for `define double` (32bit) and `dw` stands for `define word` (16bit). They just output the specified 32bit/16bit constant.
|
||||
- the additional `0x100000000` in the checksum calculation is a small hack[^fn-checksum_hack] to avoid a compiler warning
|
||||
|
||||
[^fn-checksum_hack]: The formula from the table, `-(magic + architecture + header_length)`, creates a negative value that doesn't fit into 32bit. By subtracting from `0x100000000` (= 2^(32)) instead, we keep the value positive without changing its truncated value. Without the additional sign bit(s) the result fits into 32bit and the compiler is happy :).
|
||||
|
||||
We can already _assemble_ this file (which I called `multiboot_header.asm`) using `nasm`. It produces a flat binary by default, so the resulting file just contains our 24 bytes (in little endian if you work on a x86 machine):
|
||||
|
||||
```
|
||||
> nasm multiboot_header.asm
|
||||
> hexdump -x multiboot_header
|
||||
0000000 50d6 e852 0000 0000 0018 0000 af12 17ad
|
||||
0000010 0000 0000 0008 0000
|
||||
0000018
|
||||
```
|
||||
|
||||
## The Boot Code
|
||||
To boot our kernel, we must add some code that the bootloader can call. Let's create a file named `boot.asm`:
|
||||
|
||||
```nasm
|
||||
global start
|
||||
|
||||
section .text
|
||||
bits 32
|
||||
start:
|
||||
; print `OK` to screen
|
||||
mov dword [0xb8000], 0x2f4b2f4f
|
||||
hlt
|
||||
```
|
||||
There are some new commands:
|
||||
|
||||
- `global` exports a label (makes it public). As `start` will be the entry point of our kernel, it needs to be public.
|
||||
- the `.text` section is the default section for executable code
|
||||
- `bits 32` specifies that the following lines are 32-bit instructions. It's needed because the CPU is still in [Protected mode] when GRUB starts our kernel. When we switch to [Long mode] in the [next post] we can use `bits 64` (64-bit instructions).
|
||||
- the `mov dword` instruction moves the 32bit constant `0x2f4b2f4f` to the memory at address `b8000` (it prints `OK` to the screen, an explanation follows in the next posts)
|
||||
- `hlt` is the halt instruction and causes the CPU to stop
|
||||
|
||||
Through assembling, viewing and disassembling we can see the CPU [Opcodes] in action:
|
||||
|
||||
[Opcodes]: https://en.wikipedia.org/wiki/Opcode
|
||||
|
||||
```
|
||||
> nasm boot.asm
|
||||
> hexdump -x boot
|
||||
0000000 05c7 8000 000b 2f4b 2f4f 00f4
|
||||
000000b
|
||||
> ndisasm -b 32 boot
|
||||
00000000 C70500800B004B2F mov dword [dword 0xb8000],0x2f4b2f4f
|
||||
-4F2F
|
||||
0000000A F4 hlt
|
||||
```
|
||||
|
||||
|
||||
## Building the Executable
|
||||
To boot our executable later through GRUB, it should be an [ELF] executable. So we want `nasm` to create ELF [object files] instead of plain binaries. To do that, we simply pass the `‑f elf64` argument to it.
|
||||
|
||||
[ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
|
||||
[object files]: http://wiki.osdev.org/Object_Files
|
||||
|
||||
To create the ELF _executable_, we need to [link] the object files together. We use a custom [linker script] named `linker.ld`:
|
||||
|
||||
[link]: https://en.wikipedia.org/wiki/Linker_(computing)
|
||||
[linker script]: https://sourceware.org/binutils/docs/ld/Scripts.html
|
||||
|
||||
```
|
||||
ENTRY(start)
|
||||
|
||||
SECTIONS {
|
||||
. = 1M;
|
||||
|
||||
.boot :
|
||||
{
|
||||
/* ensure that the multiboot header is at the beginning */
|
||||
*(.multiboot_header)
|
||||
}
|
||||
|
||||
.text :
|
||||
{
|
||||
*(.text)
|
||||
}
|
||||
}
|
||||
```
|
||||
Let's translate it:
|
||||
|
||||
- `start` is the entry point, the bootloader will jump to it after loading the kernel
|
||||
- `. = 1M;` sets the load address of the first section to 1 MiB, which is a conventional place to load a kernel[^Linker 1M]
|
||||
- the executable will have two sections: `.boot` at the beginning and `.text` afterwards
|
||||
- the `.text` output section contains all input sections named `.text`
|
||||
- Sections named `.multiboot_header` are added to the first output section (`.boot`) to ensure they are at the beginning of the executable. This is necessary because GRUB expects to find the Multiboot header very early in the file.
|
||||
|
||||
[^Linker 1M]: We don't want to load the kernel to e.g. `0x0` because there are many special memory areas below the 1MB mark (for example the so-called VGA buffer at `0xb8000`, that we use to print `OK` to the screen).
|
||||
|
||||
So let's create the ELF object files and link them using our new linker script:
|
||||
|
||||
```
|
||||
> nasm -f elf64 multiboot_header.asm
|
||||
> nasm -f elf64 boot.asm
|
||||
> ld -n -o kernel.bin -T linker.ld multiboot_header.o boot.o
|
||||
```
|
||||
It's important to pass the `-n` (or `--nmagic`) flag to the linker, which disables the automatic section alignment in the executable. Otherwise the linker may page align the `.boot` section in the executable file. If that happens, GRUB isn't able to find the Multiboot header because it isn't at the beginning anymore.
|
||||
|
||||
We can use `objdump` to print the sections of the generated executable and verify that the `.boot` section has a low file offset:
|
||||
|
||||
```
|
||||
> objdump -h kernel.bin
|
||||
kernel.bin: file format elf64-x86-64
|
||||
|
||||
Sections:
|
||||
Idx Name Size VMA LMA File off Algn
|
||||
0 .boot 00000018 0000000000100000 0000000000100000 00000080 2**0
|
||||
CONTENTS, ALLOC, LOAD, READONLY, DATA
|
||||
1 .text 0000000b 0000000000100020 0000000000100020 000000a0 2**4
|
||||
CONTENTS, ALLOC, LOAD, READONLY, CODE
|
||||
```
|
||||
_Note_: The `ld` and `objdump` commands are platform specific. If you're _not_ working on x86_64 architecture, you will need to [cross compile binutils]. Then use `x86_64‑elf‑ld` and `x86_64‑elf‑objdump` instead of `ld` and `objdump`.
|
||||
[cross compile binutils]: {{% relref "cross-compile-binutils.md" %}}
|
||||
|
||||
## Creating the ISO
|
||||
The last step is to create a bootable ISO image with GRUB. We need to create the following directory structure and copy the `kernel.bin` to the right place:
|
||||
|
||||
```
|
||||
isofiles
|
||||
└── boot
|
||||
├── grub
|
||||
│ └── grub.cfg
|
||||
└── kernel.bin
|
||||
|
||||
```
|
||||
The `grub.cfg` specifies the file name of our kernel and its Multiboot 2 compliance. It looks like this:
|
||||
|
||||
```
|
||||
set timeout=0
|
||||
set default=0
|
||||
|
||||
menuentry "my os" {
|
||||
multiboot2 /boot/kernel.bin
|
||||
boot
|
||||
}
|
||||
```
|
||||
Now we can create a bootable image using the command:
|
||||
|
||||
```
|
||||
grub-mkrescue -o os.iso isofiles
|
||||
```
|
||||
_Note_: `grub-mkrescue` causes problems on some platforms. If it does not work for you, try the following steps:
|
||||
|
||||
- try to run it with `--verbose`
|
||||
- make sure `xorriso` is installed (`xorriso` or `libisoburn` package)
|
||||
- If you're using an EFI-system, `grub-mkrescue` tries to create an EFI image by default. You can either pass `-d /usr/lib/grub/i386-pc` to avoid EFI or install the `mtools` package to get a working EFI image
|
||||
- on some system the command is named `grub2-mkrescue`
|
||||
|
||||
## Booting
|
||||
Now it's time to boot our OS. We will use [QEMU]:
|
||||
|
||||
[QEMU]: https://en.wikipedia.org/wiki/QEMU
|
||||
|
||||
```
|
||||
qemu-system-x86_64 -cdrom os.iso
|
||||
```
|
||||

|
||||
|
||||
Notice the green `OK` in the upper left corner. If it does not work for you, take a look at the comment section.
|
||||
|
||||
Let's summarize what happens:
|
||||
|
||||
1. the BIOS loads the bootloader (GRUB) from the virtual hard drive (the ISO)
|
||||
2. the bootloader reads the kernel executable and finds the Multiboot header
|
||||
3. it copies the `.boot` and `.text` sections to memory (to addresses `0x100000` and `0x100020`)
|
||||
4. it jumps to the entry point (`0x100020`, you can obtain it through `objdump -f`)
|
||||
5. our kernel prints the green `OK` and stops the CPU
|
||||
|
||||
You can test it on real hardware, too. Just burn the ISO to a disk or USB stick and boot from it.
|
||||
|
||||
## Build Automation
|
||||
|
||||
Right now we need to execute 4 commands in the right order everytime we change a file. That's bad. So let's automate the build using a [Makefile][Makefile tutorial]. But first we should create some clean directory structure for our source files to separate the architecture specific files:
|
||||
|
||||
[Makefile tutorial]: http://mrbook.org/blog/tutorials/make/
|
||||
|
||||
```
|
||||
…
|
||||
├── Makefile
|
||||
└── src
|
||||
└── arch
|
||||
└── x86_64
|
||||
├── multiboot_header.asm
|
||||
├── boot.asm
|
||||
├── linker.ld
|
||||
└── grub.cfg
|
||||
```
|
||||
The Makefile looks like this (indented with tabs instead of spaces):
|
||||
|
||||
```Makefile
|
||||
arch ?= x86_64
|
||||
kernel := build/kernel-$(arch).bin
|
||||
iso := build/os-$(arch).iso
|
||||
|
||||
linker_script := src/arch/$(arch)/linker.ld
|
||||
grub_cfg := src/arch/$(arch)/grub.cfg
|
||||
assembly_source_files := $(wildcard src/arch/$(arch)/*.asm)
|
||||
assembly_object_files := $(patsubst src/arch/$(arch)/%.asm, \
|
||||
build/arch/$(arch)/%.o, $(assembly_source_files))
|
||||
|
||||
.PHONY: all clean run iso
|
||||
|
||||
all: $(kernel)
|
||||
|
||||
clean:
|
||||
@rm -r build
|
||||
|
||||
run: $(iso)
|
||||
@qemu-system-x86_64 -cdrom $(iso)
|
||||
|
||||
iso: $(iso)
|
||||
|
||||
$(iso): $(kernel) $(grub_cfg)
|
||||
@mkdir -p build/isofiles/boot/grub
|
||||
@cp $(kernel) build/isofiles/boot/kernel.bin
|
||||
@cp $(grub_cfg) build/isofiles/boot/grub
|
||||
@grub-mkrescue -o $(iso) build/isofiles 2> /dev/null
|
||||
@rm -r build/isofiles
|
||||
|
||||
$(kernel): $(assembly_object_files) $(linker_script)
|
||||
@ld -n -T $(linker_script) -o $(kernel) $(assembly_object_files)
|
||||
|
||||
# compile assembly files
|
||||
build/arch/$(arch)/%.o: src/arch/$(arch)/%.asm
|
||||
@mkdir -p $(shell dirname $@)
|
||||
@nasm -felf64 $< -o $@
|
||||
```
|
||||
Some comments (see the [Makefile tutorial] if you don't know `make`):
|
||||
|
||||
- the `$(wildcard src/arch/$(arch)/*.asm)` chooses all assembly files in the src/arch/$(arch)` directory, so you don't have to update the Makefile when you add a file
|
||||
- the `patsubst` operation for `assembly_object_files` just translates `src/arch/$(arch)/XYZ.asm` to `build/arch/$(arch)/XYZ.o`
|
||||
- the `$<` and `$@` in the assembly target are [automatic variables]
|
||||
- if you're using [cross-compiled binutils][cross compile binutils] just replace `ld` with `x86_64‑elf‑ld`
|
||||
|
||||
[automatic variables]: https://www.gnu.org/software/make/manual/html_node/Automatic-Variables.html
|
||||
|
||||
Now we can invoke `make` and all updated assembly files are compiled and linked. The `make iso` command also creates the ISO image and `make run` will additionally start QEMU.
|
||||
|
||||
## What's next?
|
||||
|
||||
In the [next post] we will create a page table and do some CPU configuration to switch to the 64-bit [long mode].
|
||||
|
||||
[next post]: {{% relref "2015-08-25-entering-longmode.md" %}}
|
||||
[long mode]: https://en.wikipedia.org/wiki/Long_mode
|
||||
@@ -1,537 +0,0 @@
|
||||
+++
|
||||
title = "Entering Long Mode"
|
||||
slug = "entering-longmode"
|
||||
date = "2015-08-25"
|
||||
updated = "2015-10-29"
|
||||
aliases = [
|
||||
"/2015/08/25/entering-longmode/",
|
||||
"/rust-os/entering-longmode.html",
|
||||
]
|
||||
+++
|
||||
|
||||
In the [previous post] we created a minimal multiboot kernel. It just prints `OK` and hangs. The goal is to extend it and call 64-bit [Rust] code. But the CPU is currently in [protected mode] and allows only 32-bit instructions and up to 4GiB memory. So we need to set up _Paging_ and switch to the 64-bit [long mode] first.
|
||||
|
||||
[previous post]: {{% relref "2015-08-18-multiboot-kernel.md" %}}
|
||||
[Rust]: http://www.rust-lang.org/
|
||||
[protected mode]: https://en.wikipedia.org/wiki/Protected_mode
|
||||
[long mode]: https://en.wikipedia.org/wiki/Long_mode
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
I tried to explain everything in detail and to keep the code as simple as possible. If you have any questions, suggestions, or issues, please leave a comment or [create an issue] on Github. The source code is available in a [repository][source code], too.
|
||||
|
||||
[create an issue]: https://github.com/phil-opp/blog_os/issues
|
||||
[source code]: https://github.com/phil-opp/blog_os/tree/entering_longmode/src/arch/x86_64
|
||||
|
||||
_Notable Changes_: We don't use 1GiB pages anymore, since they have [compatibility problems][1GiB page problems]. The identity mapping is now done through 2MiB pages.
|
||||
[1GiB page problems]: https://github.com/phil-opp/blog_os/issues/17
|
||||
|
||||
## Some Tests
|
||||
To avoid bugs and strange errors on old CPUs we should check if the processor supports every needed feature. If not, the kernel should abort and display an error message. To handle errors easily, we create an error procedure in `boot.asm`. It prints a rudimentary `ERR: X` message, where X is an error code letter, and hangs:
|
||||
|
||||
```nasm
|
||||
; Prints `ERR: ` and the given error code to screen and hangs.
|
||||
; parameter: error code (in ascii) in al
|
||||
error:
|
||||
mov dword [0xb8000], 0x4f524f45
|
||||
mov dword [0xb8004], 0x4f3a4f52
|
||||
mov dword [0xb8008], 0x4f204f20
|
||||
mov byte [0xb800a], al
|
||||
hlt
|
||||
```
|
||||
At address `0xb8000` begins the so-called [VGA text buffer]. It's an array of screen characters that are displayed by the graphics card. A [future post] will cover the VGA buffer in detail and create a Rust interface to it. But for now, manual bit-fiddling is the easiest option.
|
||||
|
||||
[VGA text buffer]: https://en.wikipedia.org/wiki/VGA-compatible_text_mode
|
||||
[future post]: {{% relref "2015-10-23-printing-to-screen.md" %}}
|
||||
|
||||
A screen character consists of a 8 bit color code and a 8 bit [ASCII] character. We used the color code `4f` for all characters, which means white text on red background. `0x52` is an ASCII `R`, `0x45` is an `E`, `0x3a` is a `:`, and `0x20` is a space. The second space is overwritten by the given ASCII byte. Finally the CPU is stopped with the `hlt` instruction.
|
||||
|
||||
[ASCII]: https://en.wikipedia.org/wiki/ASCII
|
||||
|
||||
Now we can add some check _functions_. A function is just a normal label with an `ret` (return) instruction at the end. The `call` instruction can be used to call it. Unlike the `jmp` instruction that just jumps to a memory address, the `call` instruction will push a return address to the stack (and the `ret` will jump to this address). But we don't have a stack yet. The [stack pointer] in the esp register could point to some important data or even invalid memory. So we need to update it and point it to some valid stack memory.
|
||||
|
||||
[stack pointer]: http://stackoverflow.com/a/1464052/866447
|
||||
|
||||
### Creating a Stack
|
||||
To create stack memory we reserve some bytes at the end of our `boot.asm`:
|
||||
|
||||
```nasm
|
||||
...
|
||||
section .bss
|
||||
stack_bottom:
|
||||
resb 64
|
||||
stack_top:
|
||||
```
|
||||
A stack doesn't need to be initialized because we will `pop` only when we `pushed` before. So storing the stack memory in the executable file would make it unnecessary large. By using the [.bss] section and the `resb` (reserve byte) command, we just store the length of the uninitialized data (= 64). When loading the executable, GRUB will create the section of required size in memory.
|
||||
|
||||
[.bss]: https://en.wikipedia.org/wiki/.bss
|
||||
|
||||
To use the new stack, we update the stack pointer register right after `start`:
|
||||
|
||||
```nasm
|
||||
global start
|
||||
|
||||
section .text
|
||||
bits 32
|
||||
start:
|
||||
mov esp, stack_top
|
||||
|
||||
; print `OK` to screen
|
||||
...
|
||||
```
|
||||
We use `stack_top` because the stack grows downwards: A `push eax` subtracts 4 from `esp` and does a `mov [esp], eax` afterwards (`eax` is a general purpose register).
|
||||
|
||||
Now we have a valid stack pointer and are able to call functions. The following check functions are just here for completeness and I won't explain details. Basically they all work the same: They will check for a feature and jump to `error` if it's not available.
|
||||
|
||||
### Multiboot check
|
||||
We rely on some Multiboot features in the next posts. To make sure the kernel was really loaded by a Multiboot compliant bootloader, we can check the `eax` register. According to the Multiboot specification ([PDF][Multiboot specification]), the bootloader must write the magic value `0x36d76289` to it before loading a kernel. To verify that we can add a simple function:
|
||||
|
||||
```nasm
|
||||
check_multiboot:
|
||||
cmp eax, 0x36d76289
|
||||
jne .no_multiboot
|
||||
ret
|
||||
.no_multiboot:
|
||||
mov al, "0"
|
||||
jmp error
|
||||
```
|
||||
We use the `cmp` instruction to compare the value in `eax` to the magic value. If the values are equal, the `cmp` instruction sets the zero flag in the [FLAGS register]. The `jne` (“jump if not equal”) instruction reads this zero flag and jumps to the given address if it's not set. Thus we jump to the `.no_multiboot` label if `eax` does not contain the magic value.
|
||||
|
||||
In `no_multiboot`, we use the `jmp` (“jump”) instruction to jump to our error function. We could just as well use the `call` instruction, which additionally pushes the return address. But the return address is not needed because `error` never returns. To pass `0` as error code to the `error` function, we move it into `al` before the jump (`error` will read it from there).
|
||||
|
||||
[Multiboot specification]: http://nongnu.askapache.com/grub/phcoder/multiboot.pdf
|
||||
[FLAGS register]: https://en.wikipedia.org/wiki/FLAGS_register
|
||||
|
||||
### CPUID check
|
||||
[CPUID] is a CPU instruction that can be used to get various information about the CPU. But not every processor supports it. CPUID detection is quite laborious, so we just copy a detection function from the [OSDev wiki][CPUID detection]:
|
||||
|
||||
[CPUID]: http://wiki.osdev.org/CPUID
|
||||
[CPUID detection]: http://wiki.osdev.org/Setting_Up_Long_Mode#Detection_of_CPUID
|
||||
|
||||
```nasm
|
||||
check_cpuid:
|
||||
; Check if CPUID is supported by attempting to flip the ID bit (bit 21)
|
||||
; in the FLAGS register. If we can flip it, CPUID is available.
|
||||
|
||||
; Copy FLAGS in to EAX via stack
|
||||
pushfd
|
||||
pop eax
|
||||
|
||||
; Copy to ECX as well for comparing later on
|
||||
mov ecx, eax
|
||||
|
||||
; Flip the ID bit
|
||||
xor eax, 1 << 21
|
||||
|
||||
; Copy EAX to FLAGS via the stack
|
||||
push eax
|
||||
popfd
|
||||
|
||||
; Copy FLAGS back to EAX (with the flipped bit if CPUID is supported)
|
||||
pushfd
|
||||
pop eax
|
||||
|
||||
; Restore FLAGS from the old version stored in ECX (i.e. flipping the
|
||||
; ID bit back if it was ever flipped).
|
||||
push ecx
|
||||
popfd
|
||||
|
||||
; Compare EAX and ECX. If they are equal then that means the bit
|
||||
; wasn't flipped, and CPUID isn't supported.
|
||||
cmp eax, ecx
|
||||
je .no_cpuid
|
||||
ret
|
||||
.no_cpuid:
|
||||
mov al, "1"
|
||||
jmp error
|
||||
```
|
||||
Basically, the `CPUID` instruction is supported if we can flip some bit in the [FLAGS register]. We can't operate on the flags register directly, so we need to load it into some general purpose register such as `eax` first. The only way to do this is to push the `FLAGS` register on the stack through the `pushfd` instruction and then pop it into `eax`. Equally, we write it back through `push ecx` and `popfd`. To flip the bit we use the `xor` instruction to perform an [exclusive OR]. Finally we compare the two values and jump to `.no_cpuid` if both are equal (`je` – “jump if equal”). The `.no_cpuid` code just jumps to the `error` function with error code `1`.
|
||||
|
||||
Don't worry, you don't need to understand the details.
|
||||
|
||||
[exclusive OR]: https://en.wikipedia.org/wiki/Exclusive_or
|
||||
|
||||
### Long Mode check
|
||||
Now we can use CPUID to detect whether long mode can be used. I use code from [OSDev][long mode detection] again:
|
||||
|
||||
[long mode detection]: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
|
||||
|
||||
```nasm
|
||||
check_long_mode:
|
||||
; test if extended processor info in available
|
||||
mov eax, 0x80000000 ; implicit argument for cpuid
|
||||
cpuid ; get highest supported argument
|
||||
cmp eax, 0x80000001 ; it needs to be at least 0x80000001
|
||||
jb .no_long_mode ; if it's less, the CPU is too old for long mode
|
||||
|
||||
; use extended info to test if long mode is available
|
||||
mov eax, 0x80000001 ; argument for extended processor info
|
||||
cpuid ; returns various feature bits in ecx and edx
|
||||
test edx, 1 << 29 ; test if the LM-bit is set in the D-register
|
||||
jz .no_long_mode ; If it's not set, there is no long mode
|
||||
ret
|
||||
.no_long_mode:
|
||||
mov al, "2"
|
||||
jmp error
|
||||
```
|
||||
Like many low-level things, CPUID is a bit strange. Instead of taking a parameter, the `cpuid` instruction implicitely uses the `eax` register as argument. To test if long mode is available, we need to call `cpuid` with `0x80000001` in `eax`. This loads some information to the `ecx` and `edx` registers. Long mode is supported if the 29th bit in `edx` is set. [Wikipedia][cpuid long mode] has detailed information.
|
||||
|
||||
[cpuid long mode]: https://en.wikipedia.org/wiki/CPUID#EAX.3D80000001h:_Extended_Processor_Info_and_Feature_Bits
|
||||
|
||||
If you look at the assembly above, you'll probably notice that we call `cpuid` twice. The reason is that the CPUID command started with only a few functions and was extended over time. So old processors may not know the `0x80000001` argument at all. To test if they do, we need to invoke `cpuid` with `0x80000000` in `eax` first. It returns the highest supported parameter value in `eax`. If it's at least `0x80000001`, we can test for long mode as described above. Else the CPU is old and doesn't know what long mode is either. In that case, we directly jump to `.no_long_mode` through the `jb` instruction (“jump if below”).
|
||||
|
||||
### Putting it together
|
||||
We just call these check functions right after start:
|
||||
|
||||
```nasm
|
||||
global _start
|
||||
|
||||
section .text
|
||||
bits 32
|
||||
_start:
|
||||
mov esp, stack_top
|
||||
|
||||
call check_multiboot
|
||||
call check_cpuid
|
||||
call check_long_mode
|
||||
|
||||
; print `OK` to screen
|
||||
...
|
||||
```
|
||||
When the CPU doesn't support a needed feature, we get an error message with an unique error code. Now we can start the real work.
|
||||
|
||||
## Paging
|
||||
_Paging_ is a memory management scheme that separates virtual and physical memory. The address space is split into equal sized _pages_ and a _page table_ specifies which virtual page points to which physical page. If you never heard of paging, you might want to look at the paging introduction ([PDF][paging chapter]) of the [Three Easy Pieces] OS book.
|
||||
|
||||
[paging chapter]: http://pages.cs.wisc.edu/~remzi/OSTEP/vm-paging.pdf
|
||||
[Three Easy Pieces]: http://pages.cs.wisc.edu/~remzi/OSTEP/
|
||||
|
||||
In long mode, x86 uses a page size of 4096 bytes and a 4 level page table that consists of:
|
||||
|
||||
- the Page-Map Level-4 Table (PML4),
|
||||
- the Page-Directory Pointer Table (PDP),
|
||||
- the Page-Directory Table (PD),
|
||||
- and the Page Table (PT).
|
||||
|
||||
As I don't like these names, I will call them P4, P3, P2, and P1 from now on.
|
||||
|
||||
Each page table contains 512 entries and one entry is 8 bytes, so they fit exactly in one page (`512*8 = 4096`). To translate a virtual address to a physical address the CPU[^hardware_lookup] will do the following[^virtual_physical_translation_source]:
|
||||
|
||||
[^hardware_lookup]: In the x86 architecture, the page tables are _hardware walked_, so the CPU will look at the table on its own when it needs a translation. Other architectures, for example MIPS, just throw an exception and let the OS translate the virtual address.
|
||||
|
||||
[^virtual_physical_translation_source]: Image source: [Wikipedia](https://commons.wikimedia.org/wiki/File:X86_Paging_64bit.svg), with modified font size, page table naming, and removed sign extended bits. The modified file is licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
|
||||
|
||||

|
||||
|
||||
1. Get the address of the P4 table from the CR3 register
|
||||
2. Use bits 39-47 (9 bits) as an index into P4 (`2^9 = 512 = number of entries`)
|
||||
3. Use the following 9 bits as an index into P3
|
||||
4. Use the following 9 bits as an index into P2
|
||||
5. Use the following 9 bits as an index into P1
|
||||
6. Use the last 12 bits as page offset (`2^12 = 4096 = page size`)
|
||||
|
||||
But what happens to bits 48-63 of the 64-bit virtual address? Well, they can't be used. The “64-bit” long mode is in fact just a 48-bit mode. The bits 48-63 must be copies of bit 47, so each valid virtual address is still unique. For more information see [Wikipedia][wikipedia_48bit_mode].
|
||||
|
||||
[wikipedia_48bit_mode]: https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
|
||||
|
||||
An entry in the P4, P3, P2, and P1 tables consists of the page aligned 52-bit _physical_ address of the frame or the next page table and the following bits that can be OR-ed in:
|
||||
|
||||
Bit(s) | Name | Meaning
|
||||
--------------------- | ------ | ----------------------------------
|
||||
0 | present | the page is currently in memory
|
||||
1 | writable | it's allowed to write to this page
|
||||
2 | user accessible | if not set, only kernel mode code can access this page
|
||||
3 | write through caching | writes go directly to memory
|
||||
4 | disable cache | no cache is used for this page
|
||||
5 | accessed | the CPU sets this bit when this page is used
|
||||
6 | dirty | the CPU sets this bit when a write to this page occurs
|
||||
7 | huge page/null | must be 0 in P1 and P4, creates a 1GiB page in P3, creates a 2MiB page in P2
|
||||
8 | global | page isn't flushed from caches on address space switch (PGE bit of CR4 register must be set)
|
||||
9-11 | available | can be used freely by the OS
|
||||
52-62 | available | can be used freely by the OS
|
||||
63 | no execute | forbid executing code on this page (the NXE bit in the EFER register must be set)
|
||||
|
||||
### Set Up Identity Paging
|
||||
When we switch to long mode, paging will be activated automatically. The CPU will then try to read the instruction at the following address, but this address is now a virtual address. So we need to do _identity mapping_, i.e. map a physical address to the same virtual address.
|
||||
|
||||
The `huge page` bit is now very useful to us. It creates a 2MiB (when used in P2) or even a 1GiB page (when used in P3). So we could map the first _gigabytes_ of the kernel with only one P4 and one P3 table by using 1GiB pages. Unfortunately 1GiB pages are relatively new feature, for example Intel introduced it 2010 in the [Westmere architecture]. Therefore we will use 2MiB pages instead to make our kernel compatible to older computers, too.
|
||||
[Westmere architecture]: https://en.wikipedia.org/wiki/Westmere_(microarchitecture)#Technology
|
||||
|
||||
To identity map the first gigabyte of our kernel with 512 2MiB pages, we need one P4, one P3, and one P2 table. Of course we will replace them with finer-grained tables later. But now that we're stuck with assembly, we choose the easiest way.
|
||||
|
||||
We can add these two tables at the beginning[^page_table_alignment] of the `.bss` section:
|
||||
|
||||
[^page_table_alignment]: Page tables need to be page-aligned as the bits 0-11 are used for flags. By putting these tables at the beginning of `.bss`, the linker can just page align the whole section and we don't have unused padding bytes in between.
|
||||
|
||||
```nasm
|
||||
...
|
||||
|
||||
section .bss
|
||||
align 4096
|
||||
p4_table:
|
||||
resb 4096
|
||||
p3_table:
|
||||
resb 4096
|
||||
p2_table:
|
||||
resb 4096
|
||||
stack_bottom:
|
||||
resb 64
|
||||
stack_top:
|
||||
```
|
||||
The `resb` command reserves the specified amount of bytes without initializing them, so the 8KiB don't need to be saved in the executable. The `align 4096` ensures that the page tables are page aligned.
|
||||
|
||||
When GRUB creates the `.bss` section in memory, it will initialize it to `0`. So the `p4_table` is already valid (it contains 512 non-present entries) but not very useful. To be able to map 2MiB pages, we need to link P4's first entry to the `p3_table` and P3's first entry to the the `p2_table`:
|
||||
|
||||
```nasm
|
||||
set_up_page_tables:
|
||||
; map first P4 entry to P3 table
|
||||
mov eax, p3_table
|
||||
or eax, 0b11 ; present + writable
|
||||
mov [p4_table], eax
|
||||
|
||||
; map first P3 entry to P2 table
|
||||
mov eax, p2_table
|
||||
or eax, 0b11 ; present + writable
|
||||
mov [p3_table], eax
|
||||
|
||||
; TODO map each P2 entry to a huge 2MiB page
|
||||
ret
|
||||
```
|
||||
We just set the present and writable bits (`0b11` is a binary number) in the aligned P3 table address and move it to the first 4 bytes of the P4 table. Then we do the same to link the first P3 entry to the `p2_table`.
|
||||
|
||||
Now we need to map P2's first entry to a huge page starting at 0, P2's second entry to a huge page starting at 2MiB, P2's third entry to a huge page starting at 4MiB, and so on. It's time for our first (and only) assembly loop:
|
||||
|
||||
```nasm
|
||||
set_up_page_tables:
|
||||
...
|
||||
; map each P2 entry to a huge 2MiB page
|
||||
mov ecx, 0 ; counter variable
|
||||
|
||||
.map_p2_table:
|
||||
; map ecx-th P2 entry to a huge page that starts at address 2MiB*ecx
|
||||
mov eax, 0x200000 ; 2MiB
|
||||
mul ecx ; start address of ecx-th page
|
||||
or eax, 0b10000011 ; present + writable + huge
|
||||
mov [p2_table + ecx * 8], eax ; map ecx-th entry
|
||||
|
||||
inc ecx ; increase counter
|
||||
cmp ecx, 512 ; if counter == 512, the whole P2 table is mapped
|
||||
jne .map_p2_table ; else map the next entry
|
||||
|
||||
ret
|
||||
```
|
||||
Maybe I first explain how an assembly loop works. We use the `ecx` register as a counter variable, just like `i` in a for loop. After mapping the `ecx-th` entry, we increase `ecx` by one and jump to `.map_p2_table` again if it's still smaller 512.
|
||||
|
||||
To map a P2 entry we first calculate the start address of its page in `eax`: The `ecx-th` entry needs to be mapped to `ecx * 2MiB`. We use the `mul` operation for that, which multiplies `eax` with the given register and stores the result in `eax`. Then we set the `present`, `writable`, and `huge page` bits and write it to the P2 entry. The address of the `ecx-th` entry in P2 is `p2_table + ecx * 8`, because each entry is 8 bytes large.
|
||||
|
||||
Now the first gigabyte (512 * 2MiB) of our kernel is identity mapped and thus accessible through the same physical and virtual addresses.
|
||||
|
||||
### Enable Paging
|
||||
To enable paging and enter long mode, we need to do the following:
|
||||
|
||||
1. write the address of the P4 table to the CR3 register (the CPU will look there, see the [paging section](#paging))
|
||||
2. long mode is an extension of [Physical Address Extension] \(PAE), so we need to enable PAE first
|
||||
3. Set the long mode bit in the EFER register
|
||||
4. Enable Paging
|
||||
|
||||
[Physical Address Extension]: https://en.wikipedia.org/wiki/Physical_Address_Extension
|
||||
|
||||
The assembly function looks like this (some boring bit-moving to various registers):
|
||||
|
||||
```nasm
|
||||
enable_paging:
|
||||
; load P4 to cr3 register (cpu uses this to access the P4 table)
|
||||
mov eax, p4_table
|
||||
mov cr3, eax
|
||||
|
||||
; enable PAE-flag in cr4 (Physical Address Extension)
|
||||
mov eax, cr4
|
||||
or eax, 1 << 5
|
||||
mov cr4, eax
|
||||
|
||||
; set the long mode bit in the EFER MSR (model specific register)
|
||||
mov ecx, 0xC0000080
|
||||
rdmsr
|
||||
or eax, 1 << 8
|
||||
wrmsr
|
||||
|
||||
; enable paging in the cr0 register
|
||||
mov eax, cr0
|
||||
or eax, 1 << 31
|
||||
mov cr0, eax
|
||||
|
||||
ret
|
||||
```
|
||||
The `or eax, 1 << X` is a common pattern. It sets the bit `X` in the eax register (`<<` is a left shift). Through `rdmsr` and `wrmsr` it's possible to read/write to the so-called model specific registers at address `ecx` (in this case `ecx` points to the EFER register).
|
||||
|
||||
Finally we need to call our new functions in `start`:
|
||||
|
||||
```nasm
|
||||
...
|
||||
start:
|
||||
mov esp, stack_top
|
||||
|
||||
call check_multiboot
|
||||
call check_cpuid
|
||||
call check_long_mode
|
||||
|
||||
call set_up_page_tables ; new
|
||||
call enable_paging ; new
|
||||
|
||||
; print `OK` to screen
|
||||
mov dword [0xb8000], 0x2f4b2f4f
|
||||
hlt
|
||||
...
|
||||
```
|
||||
To test it we execute `make run`. If the green OK is still printed, we have successfully enabled paging!
|
||||
|
||||
## The Global Descriptor Table
|
||||
After enabling Paging, the processor is in long mode. So we can use 64-bit instructions now, right? Wrong. The processor is still in some 32-bit compatibility submode. To actually execute 64-bit code, we need to set up a new Global Descriptor Table.
|
||||
The Global Descriptor Table (GDT) was used for _Segmentation_ in old operating systems. I won't explain Segmentation but the [Three Easy Pieces] OS book has good introduction ([PDF][Segmentation chapter]) again.
|
||||
|
||||
[Segmentation chapter]: http://pages.cs.wisc.edu/~remzi/OSTEP/vm-segmentation.pdf
|
||||
|
||||
Today almost everyone uses Paging instead of Segmentation (and so do we). But on x86, a GDT is always required, even when you're not using Segmentation. GRUB has set up a valid 32-bit GDT for us but now we need to switch to a long mode GDT.
|
||||
|
||||
A GDT always starts with a 0-entry and contains an arbitrary number of segment entries afterwards. An entry has the following format:
|
||||
|
||||
Bit(s) | Name | Meaning
|
||||
--------------------- | ------ | ----------------------------------
|
||||
0-15 | limit 0-15 | the first 2 byte of the segment's limit
|
||||
16-39 | base 0-23 | the first 3 byte of the segment's base address
|
||||
40 | accessed | set by the CPU when the segment is accessed
|
||||
41 | read/write | reads allowed for code segments / writes allowed for data segments
|
||||
42 | direction/conforming | the segment grows down (i.e. base>limit) for data segments / the current privilege level can be higher than the specified level for code segments (else it must match exactly)
|
||||
43 | executable | if set, it's a code segment, else it's a data segment
|
||||
44 | descriptor type | should be 1 for code and data segments
|
||||
45-46 | privilege | the [ring level]: 0 for kernel, 3 for user
|
||||
47 | present | must be 1 for valid selectors
|
||||
48-51 | limit 16-19 | bits 16 to 19 of the segment's limit
|
||||
52 | available | freely available to the OS
|
||||
53 | 64-bit | should be set for 64-bit code segments
|
||||
54 | 32-bit | should be set for 32-bit segments
|
||||
55 | granularity | if it's set, the limit is the number of pages, else it's a byte number
|
||||
56-63 | base 24-31 | the last byte of the base address
|
||||
|
||||
[ring level]: http://wiki.osdev.org/Security#Rings
|
||||
|
||||
We need one code and one data segment. They have the following bits set: _descriptor type_, _present_, and _read/write_. The code segment has additionally the _executable_ and the _64-bit_ flag. In Long mode, it's not possible to actually use the GDT entries for Segmentation and thus the base and limit fields must be 0. Translated to assembly the long mode GDT looks like this:
|
||||
|
||||
```nasm
|
||||
section .rodata
|
||||
gdt64:
|
||||
dq 0 ; zero entry
|
||||
dq (1<<44) | (1<<47) | (1<<41) | (1<<43) | (1<<53) ; code segment
|
||||
dq (1<<44) | (1<<47) | (1<<41) ; data segment
|
||||
```
|
||||
We chose the `.rodata` section here because it's initialized read-only data. The `dq` command stands for `define quad` and outputs a 64-bit constant (similar to `dw` and `dd`). And the `(1<<44)` is a [bit shift] that sets bit 44.
|
||||
|
||||
[bit shift]: http://www.cs.umd.edu/class/sum2003/cmsc311/Notes/BitOp/bitshift.html
|
||||
|
||||
### Loading the GDT
|
||||
To load our new 64-bit GDT, we have to tell the CPU its address and length. We do this by passing the memory location of a special pointer structure to the `lgdt` (load GDT) instruction. The pointer structure looks like this:
|
||||
|
||||
```nasm
|
||||
gdt64:
|
||||
...
|
||||
dq (1<<44) | (1<<47) | (1<<41) ; data segment
|
||||
.pointer:
|
||||
dw $ - gdt64 - 1
|
||||
dq gdt64
|
||||
```
|
||||
The first 2 bytes specify the (GDT length - 1). The `$` is a special symbol that is replaced with the current address (it's equal to `.pointer` in our case). The following 8 bytes specify the GDT address. Labels that start with a point (such as `.pointer`) are sub-labels of the last label without point. To access them, they must be prefixed with the parent label (e.g., `gdt64.pointer`).
|
||||
|
||||
Now we can load the GDT in `start`:
|
||||
|
||||
```nasm
|
||||
start:
|
||||
...
|
||||
call enable_paging
|
||||
|
||||
; load the 64-bit GDT
|
||||
lgdt [gdt64.pointer]
|
||||
|
||||
; print `OK` to screen
|
||||
...
|
||||
```
|
||||
When you still see the green `OK`, everything went fine and the new GDT is loaded. But we still can't execute 64-bit code: The selector registers such as the code selector `cs` and the data selector `ds` still have the values from the old GDT. To update them, we need to load them with the GDT offset (in bytes) of the desired segment. In our case the code segment starts at byte 8 of the GDT and the data segment at byte 16. Let's try it:
|
||||
|
||||
```nasm
|
||||
...
|
||||
lgdt [gdt64.pointer]
|
||||
|
||||
; update selectors
|
||||
mov ax, 16
|
||||
mov ss, ax ; stack selector
|
||||
mov ds, ax ; data selector
|
||||
mov es, ax ; extra selector
|
||||
|
||||
; print `OK` to screen
|
||||
...
|
||||
```
|
||||
It should still work. The segment selectors are only 16-bits large, so we use the 16-bit `ax` subregister. Notice that we didn't update the code selector `cs`. We will do that later. First we should replace this hardcoded `16` by adding some labels to our GDT:
|
||||
|
||||
```nasm
|
||||
section .rodata
|
||||
gdt64:
|
||||
dq 0 ; zero entry
|
||||
.code: equ $ - gdt64 ; new
|
||||
dq (1<<44) | (1<<47) | (1<<41) | (1<<43) | (1<<53) ; code segment
|
||||
.data: equ $ - gdt64 ; new
|
||||
dq (1<<44) | (1<<47) | (1<<41) ; data segment
|
||||
.pointer:
|
||||
...
|
||||
```
|
||||
We can't just use normal labels here, as we need the table offset. We calculate this offset using the current address `$` and set the labels to this value using [equ]. Now we can use `gdt64.data` instead of 16 and `gdt64.code` instead of 8 and these labels will still work if we modify the GDT.
|
||||
|
||||
[equ]: http://www.nasm.us/doc/nasmdoc3.html#section-3.2.4
|
||||
|
||||
Now there is just one last step left to enter the true 64-bit mode: We need to load `cs` with `gdt64.code`. But we can't do it through `mov`. The only way to reload the code selector is a _far jump_ or a _far return_. These instructions work like a normal jump/return but change the code selector. We use a far jump to a long mode label:
|
||||
|
||||
```nasm
|
||||
global start
|
||||
extern long_mode_start
|
||||
...
|
||||
start:
|
||||
...
|
||||
lgdt [gdt64.pointer]
|
||||
|
||||
; update selectors
|
||||
mov ax, gdt64.data
|
||||
mov ss, ax
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
|
||||
jmp gdt64.code:long_mode_start
|
||||
...
|
||||
```
|
||||
The actual `long_mode_start` label is defined as `extern`, so it's part of another file. The `jmp gdt64.code:long_mode_start` is the mentioned far jump.
|
||||
|
||||
I put the 64-bit code into a new file to separate it from the 32-bit code, thereby we can't call the (now invalid) 32-bit code accidentally. The new file (I named it `long_mode_init.asm`) looks like this:
|
||||
|
||||
```nasm
|
||||
global long_mode_start
|
||||
|
||||
section .text
|
||||
bits 64
|
||||
long_mode_start:
|
||||
; print `OKAY` to screen
|
||||
mov rax, 0x2f592f412f4b2f4f
|
||||
mov qword [0xb8000], rax
|
||||
hlt
|
||||
```
|
||||
You should see a green `OKAY` on the screen. Some notes on this last step:
|
||||
|
||||
- As the CPU expects 64-bit instructions now, we use `bits 64`
|
||||
- We can now use the extended registers. Instead of the 32-bit `eax`, `ebx`, etc. we now have the 64-bit `rax`, `rbx`, …
|
||||
- and we can write these 64-bit registers directly to memory using `mov qword` (quad word)
|
||||
|
||||
_Congratulations_! You have successfully wrestled through this CPU configuration and compatibility mode mess :).
|
||||
|
||||
## What's next?
|
||||
It's time to finally leave assembly behind[^leave_assembly_behind] and switch to some higher level language. We won't use C or C++ (not even a single line). Instead we will use the relatively new [Rust] language. It's a systems language without garbage collections but with guaranteed memory safety. Through a real type system and many abstractions it feels like a high-level language but can still be low-level enough for OS development. The [next post] describes the Rust setup.
|
||||
|
||||
[^leave_assembly_behind]: Actually we will still need some assembly in the future, but I'll try to minimize it.
|
||||
|
||||
[Rust]: https://www.rust-lang.org/
|
||||
[next post]: {{% relref "2015-09-02-set-up-rust.md" %}}
|
||||
@@ -1,409 +0,0 @@
|
||||
+++
|
||||
title = "Set Up Rust"
|
||||
date = "2015-09-02"
|
||||
updated = "2015-05-29"
|
||||
aliases = [
|
||||
"/2015/09/02/setup-rust/",
|
||||
"/setup-rust.html",
|
||||
"/rust-os/setup-rust.html",
|
||||
]
|
||||
+++
|
||||
|
||||
In the previous posts we created a [minimal Multiboot kernel][multiboot post] and [switched to Long Mode][long mode post]. Now we can finally switch to [Rust] code. Rust is a high-level language without runtime. It allows us to not link the standard library and write bare metal code. Unfortunately the setup is not quite hassle-free yet.
|
||||
|
||||
[multiboot post]: {{% relref "2015-08-18-multiboot-kernel.md" %}}
|
||||
[long mode post]: {{% relref "2015-08-25-entering-longmode.md" %}}
|
||||
[Rust]: https://www.rust-lang.org/
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
This blog post tries to set up Rust step-by-step and point out the different problems. If you have any questions, problems, or suggestions please [file an issue] or create a comment at the bottom. The code from this post is in a [Github repository], too.
|
||||
|
||||
[file an issue]: https://github.com/phil-opp/blog_os/issues
|
||||
[Github repository]: https://github.com/phil-opp/blog_os/tree/set_up_rust
|
||||
|
||||
**Update**: We now use the `panic=abort` cargo option instead of `-Z no-landing-pads`. See [#170](https://github.com/phil-opp/blog_os/pull/170).
|
||||
|
||||
## Installing Rust
|
||||
We need a nightly compiler, as we will use many unstable features. To manage Rust installations I highly recommend [rustup]. It allows you to install nightly, beta, and stable compilers side-by-side and makes it easy to update them. To use a nightly compiler for the current directory, you can run `rustup override add nightly`.
|
||||
|
||||
[rustup]: https://www.rustup.rs/
|
||||
|
||||
The code from this post (and all following) is [automatically tested](https://travis-ci.org/phil-opp/blog_os) every day and should always work for the newest nightly. If it doesn't, please [file an issue](https://github.com/phil-opp/blog_os/issues).
|
||||
|
||||
## Creating a Cargo project
|
||||
[Cargo] is Rust excellent package manager. Normally you would call `cargo new` when you want to create a new project folder. We can't use it because our folder already exists, so we need to do it manually. Fortunately we only need to add a cargo configuration file named `Cargo.toml`:
|
||||
|
||||
[Cargo]: http://doc.crates.io/guide.html
|
||||
|
||||
```toml
|
||||
[package]
|
||||
name = "blog_os"
|
||||
version = "0.1.0"
|
||||
authors = ["Philipp Oppermann <dev@phil-opp.com>"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
```
|
||||
The `package` section contains required project metadata such as the [semantic crate version]. The `lib` section specifies that we want to build a static library, i.e. a library that contains all of its dependencies. This is required to link the Rust project with our kernel.
|
||||
|
||||
[semantic crate version]: http://doc.crates.io/manifest.html#the-package-section
|
||||
|
||||
Now we place our root source file in `src/lib.rs`:
|
||||
|
||||
```rust
|
||||
#![feature(lang_items)]
|
||||
#![no_std]
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn rust_main() {}
|
||||
|
||||
#[lang = "eh_personality"] extern fn eh_personality() {}
|
||||
#[lang = "panic_fmt"] extern fn panic_fmt() -> ! {loop{}}
|
||||
```
|
||||
Let's break it down:
|
||||
|
||||
- `#!` defines an [attribute] of the current module. Since we are at the root module, they apply to the crate itself.
|
||||
- The `feature` attribute is used to allow the specified _feature-gated_ attributes in this crate. You can't do that in a stable/beta compiler, so this is one reason we need a Rust nighly.
|
||||
- The `no_std` attribute prevents the automatic linking of the standard library. We can't use `std` because it relies on operating system features like files, system calls, and various device drivers. Remember that currently the only “feature” of our OS is printing `OKAY` :).
|
||||
- A `#` without a `!` afterwards defines an attribute for the _following_ item (a function in our case).
|
||||
- The `no_mangle` attribute disables the automatic [name mangling] that Rust uses to get unique function names. We want to do a `call rust_main` from our assembly code, so this function name must stay as it is.
|
||||
- We mark our main function as `extern` to make it compatible to the standard C [calling convention].
|
||||
- The `lang` attribute defines a Rust [language item].
|
||||
- The `eh_personality` function is used for Rust's [unwinding] on `panic!`. We can leave it empty since we don't have any unwinding support in our OS yet.
|
||||
- The `panic_fmt` function is the entry point on panic. Right now we can't do anything useful, so we just make sure that it doesn't return (required by the `!` return type).
|
||||
|
||||
[attribute]: https://doc.rust-lang.org/book/attributes.html
|
||||
[name mangling]: https://en.wikipedia.org/wiki/Name_mangling
|
||||
[calling convention]: https://en.wikipedia.org/wiki/Calling_convention
|
||||
[language item]: https://doc.rust-lang.org/book/lang-items.html
|
||||
[unwinding]: https://doc.rust-lang.org/std/rt/unwind/
|
||||
|
||||
## Building Rust
|
||||
We can now build it using `cargo build`. To make sure, we are building it for the x86_64 architecture, we can pass an explicit target:
|
||||
|
||||
```bash
|
||||
cargo build --target=x86_64-unknown-linux-gnu
|
||||
```
|
||||
It creates a static library at `target/x86_64-unknown-linux-gnu/debug/libblog_os.a`, which can be linked with our assembly kernel. If you're getting an error about a missing `core` crate, [look here][cross compile libcore].
|
||||
[cross compile libcore]: {{% relref "cross-compile-libcore.md" %}}
|
||||
|
||||
To build and link the rust library on `make`, we extend our `Makefile`([full file][github makefile]):
|
||||
|
||||
```make
|
||||
# ...
|
||||
target ?= $(arch)-unknown-linux-gnu
|
||||
rust_os := target/$(target)/debug/libblog_os.a
|
||||
# ...
|
||||
$(kernel): cargo $(rust_os) $(assembly_object_files) $(linker_script)
|
||||
@ld -n -T $(linker_script) -o $(kernel) \
|
||||
$(assembly_object_files) $(rust_os)
|
||||
|
||||
cargo:
|
||||
@cargo build --target $(target)
|
||||
```
|
||||
We added a new `cargo` target that just executes `cargo build` and modified the `$(kernel)` target to link the created static lib .
|
||||
|
||||
But now `cargo build` is executed on every `make`, even if no source file was changed. And the ISO is recreated on every `make iso`/`make run`, too. We could try to avoid this by adding dependencies on all rust source and cargo configuration files to the `cargo` target, but the ISO creation takes only half a second on my machine and most of the time we will have changed a Rust file when we run `make`. So we keep it simple for now and let cargo do the bookkeeping of changed files (it does it anyway).
|
||||
|
||||
[github makefile]: https://github.com/phil-opp/blog_os/blob/set_up_rust/Makefile
|
||||
|
||||
## Calling Rust
|
||||
Now we can call the main method in `long_mode_start`:
|
||||
|
||||
```nasm
|
||||
bits 64
|
||||
long_mode_start:
|
||||
; call the rust main
|
||||
extern rust_main ; new
|
||||
call rust_main ; new
|
||||
|
||||
; print `OKAY` to screen
|
||||
mov rax, 0x2f592f412f4b2f4f
|
||||
mov qword [0xb8000], rax
|
||||
hlt
|
||||
```
|
||||
By defining `rust_main` as `extern` we tell nasm that the function is defined in another file. As the linker takes care of linking them together, we'll get a linker error if we have a typo in the name or forget to mark the rust function as `pub extern`.
|
||||
|
||||
If we've done everything right, we should still see the green `OKAY` when executing `make run`. That means that we successfully called the Rust function and returned back to assembly.
|
||||
|
||||
## Fixing Linker Errors
|
||||
Now we can try some Rust code:
|
||||
|
||||
```rust
|
||||
pub extern fn rust_main() {
|
||||
let x = ["Hello", " ", "World", "!"];
|
||||
}
|
||||
```
|
||||
When we test it using `make run`, it fails with `undefined reference to 'memcpy'`. The `memcpy` function is one of the basic functions of the C library (`libc`). Usually the `libc` crate is linked to every Rust program together with the standard library, but we opted out through `#![no_std]`. We could try to fix this by adding the [libc crate] as `extern crate`. But `libc` is just a wrapper for the system `libc`, for example `glibc` on Linux, so this won't work for us. Instead we need to recreate the basic `libc` functions such as `memcpy`, `memmove`, `memset`, and `memcmp` in Rust.
|
||||
|
||||
[libc crate]: https://doc.rust-lang.org/nightly/libc/index.html
|
||||
|
||||
### rlibc
|
||||
Fortunately there already is a crate for that: [rlibc]. When we look at its [source code][rlibc source] we see that it contains no magic, just some [raw pointer] operations in a while loop. To add `rlibc` as a dependency we just need to add two lines to the `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
...
|
||||
[dependencies]
|
||||
rlibc = "0.1.4"
|
||||
```
|
||||
and an `extern crate` definition in our `src/lib.rs`:
|
||||
|
||||
```rust
|
||||
...
|
||||
extern crate rlibc;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn rust_main() {
|
||||
...
|
||||
```
|
||||
Now `make run` doesn't complain about `memcpy` anymore. Instead it will show a pile of new errors:
|
||||
|
||||
```
|
||||
target/debug/libblog_os.a(core-35017696.0.o):
|
||||
In function `ops::f32.Rem::rem::hfcbbcbe5711a6e6emxm':
|
||||
core.0.rs:(.text._ZN3ops7f32.Rem3rem20hfcbbcbe5711a6e6emxmE+0x1):
|
||||
undefined reference to `fmodf'
|
||||
target/debug/libblog_os.a(core-35017696.0.o):
|
||||
In function `ops::f64.Rem::rem::hbf225030671c7a35Txm':
|
||||
core.0.rs:(.text._ZN3ops7f64.Rem3rem20hbf225030671c7a35TxmE+0x1):
|
||||
undefined reference to `fmod'
|
||||
...
|
||||
```
|
||||
|
||||
[rlibc]: https://crates.io/crates/rlibc
|
||||
[rlibc source]: https://github.com/rust-lang/rlibc/blob/master/src/lib.rs
|
||||
[raw pointer]: https://doc.rust-lang.org/book/raw-pointers.html
|
||||
[crates.io]: https://crates.io
|
||||
|
||||
### --gc-sections
|
||||
The new errors are linker errors about missing `fmod` and `fmodf` functions. These functions are used for the modulo operation (`%`) on floating point numbers in [libcore]. The core library is added implicitly when using `#![no_std]` and provides basic standard library features like `Option` or `Iterator`. According to the documentation it is “dependency-free”. But it actually has some dependencies, for example on `fmod` and `fmodf`.
|
||||
|
||||
[libcore]: https://doc.rust-lang.org/core/
|
||||
|
||||
So how do we fix this problem? We don't use any floating point operations, so we could just provide our own implementations of `fmod` and `fmodf` that just do a `loop{}`. But there's a better way that doesn't fail silently when we use float modulo some day: We tell the linker to remove unused sections. That's generally a good idea as it reduces kernel size. And we don't have any references to `fmod` and `fmodf` anymore until we use floating point modulo. The magic linker flag is `--gc-sections`, which stands for “garbage collect sections”. Let's add it to the `$(kernel)` target in our `Makefile`:
|
||||
|
||||
```make
|
||||
$(kernel): cargo $(rust_os) $(assembly_object_files) $(linker_script)
|
||||
@ld -n --gc-sections -T $(linker_script) -o $(kernel) \
|
||||
$(assembly_object_files) $(rust_os)
|
||||
```
|
||||
Now we can do a `make run` again and… it doesn't boot anymore:
|
||||
|
||||
```
|
||||
GRUB error: no multiboot header found.
|
||||
```
|
||||
What happened? Well, the linker removed unused sections. And since we don't use the Multiboot section anywhere, `ld` removes it, too. So we need to tell the linker explicitely that it should keep this section. The `KEEP` command does exactly that, so we add it to the linker script (`linker.ld`):
|
||||
|
||||
```
|
||||
.boot :
|
||||
{
|
||||
/* ensure that the multiboot header is at the beginning */
|
||||
KEEP(*(.multiboot_header))
|
||||
}
|
||||
```
|
||||
Now everything should work again (the green `OKAY`). But there is another linking issue, which is triggered by some other example code.
|
||||
|
||||
### panic = "abort"
|
||||
|
||||
The following snippet still fails:
|
||||
|
||||
```rust
|
||||
...
|
||||
let test = (0..3).flat_map(|x| 0..x).zip(0..);
|
||||
```
|
||||
The error is a linker error again (hence the ugly error message):
|
||||
|
||||
```
|
||||
target/debug/libblog_os.a(blog_os.0.o):
|
||||
In function `blog_os::iter::Iterator::zip<core::iter::FlatMap<
|
||||
core::ops::Range<i32>, core::ops::Range<i32>, closure>,
|
||||
core::ops::RangeFrom<i32>>':
|
||||
/home/.../src/libcore/iter.rs:654:
|
||||
undefined reference to `_Unwind_Resume'
|
||||
```
|
||||
So the linker can't find a function named `_Unwind_Resume` that is referenced in `iter.rs:654` in libcore. This reference is not really there at [line 654 of libcore's `iter.rs`][iter.rs:654]. Instead, it is a compiler inserted _landing pad_, which is used for panic handling.
|
||||
|
||||
[iter.rs:654]: https://github.com/rust-lang/rust/blob/b0ca03923359afc8df92a802b7cc1476a72fb2d0/src/libcore/iter.rs#L654
|
||||
|
||||
By default, the destructors of all stack variables are run when a `panic` occurs. This is called _unwinding_ and allows parent threads to [recover from panics]. However, it requires a platform specific gcc library, which isn't available in our kernel.
|
||||
|
||||
[recover from panics]: https://doc.rust-lang.org/book/concurrency.html#panics
|
||||
|
||||
Fortunately, Rust allows us to disable unwinding. We just need to add some entries in our `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
# The development profile, used for `cargo build`.
|
||||
[profile.dev]
|
||||
panic = "abort"
|
||||
|
||||
# The release profile, used for `cargo build --release`.
|
||||
[profile.release]
|
||||
panic = "abort"
|
||||
```
|
||||
|
||||
These [profile sections] specify options for `cargo build` and `cargo release`. By setting the `panic` option to `abort`, we disable all unwinding in our kernel.
|
||||
|
||||
[profile sections]: http://doc.crates.io/manifest.html#the-profile-sections
|
||||
|
||||
However, there are still references to `_Unwind_Resume` in the precompiled standard libraries. This might lead to linker errors when we use specific parts of `libcore`. To avoid this, we create a dummy `_Unwind_Resume` function that loops indefinitely[^fn-libcore-unwind]:
|
||||
|
||||
[^fn-libcore-unwind]: A better solution is to recompile `libcore` with `panic="abort"`. We will do this in a future post.
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
#[no_mangle]
|
||||
pub extern "C" fn _Unwind_Resume() -> ! {
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
|
||||
Now we fixed all linking issues and our kernel builds again. But instead of displaying `Hello World`, it constantly reboots itself when we start it.
|
||||
|
||||
## Debugging the Boot Loop
|
||||
Such a boot loop is most likely caused by some [CPU exception][exception table]. When these exceptions aren't handled, a [Triple Fault] occurs and the processor resets itself. We can look at generated CPU interrupts/exceptions using QEMU:
|
||||
|
||||
[exception table]: http://wiki.osdev.org/Exceptions
|
||||
[Triple Fault]: http://wiki.osdev.org/Triple_Fault
|
||||
|
||||
```
|
||||
> qemu-system-x86_64 -d int -no-reboot -cdrom build/os-x86_64.iso
|
||||
SMM: enter
|
||||
...
|
||||
SMM: after RSM
|
||||
...
|
||||
check_exception old: 0xffffffff new 0x6
|
||||
0: v=06 e=0000 i=0 cpl=0 IP=0008:000000000010018a pc=000000000010018a
|
||||
SP=0010:0000000000102f70 env->regs[R_EAX]=0000000080010010
|
||||
...
|
||||
check_exception old: 0xffffffff new 0xd
|
||||
1: v=0d e=0062 i=0 cpl=0 IP=0008:000000000010018a pc=000000000010018a
|
||||
SP=0010:0000000000102f70 env->regs[R_EAX]=0000000080010010
|
||||
...
|
||||
check_exception old: 0xd new 0xd
|
||||
2: v=08 e=0000 i=0 cpl=0 IP=0008:000000000010018a pc=000000000010018a
|
||||
SP=0010:0000000000102f70 env->regs[R_EAX]=0000000080010010
|
||||
...
|
||||
check_exception old: 0x8 new 0xd
|
||||
```
|
||||
Let me first explain the QEMU arguments: The `-d int` logs CPU interrupts to the console and the `-no-reboot` flag closes QEMU instead of constant rebooting. But what does the cryptical output mean? I already omitted most of it as we don't need it here. Let's break down the rest:
|
||||
|
||||
- The `SMM: enter` and `SMM: after RSM` blocks are created before our OS boots, so we just ignore them.
|
||||
- The `check_exception old: 0xffffffff new 0x6` block is the interesting one. It says: “a new CPU exception with number `0x6` occurred“.
|
||||
- The last blocks indicate further exceptions. They were thrown because we didn't handle the `0x6` exception, so we're going to ignore them, too.
|
||||
|
||||
So let's look at the first exception: `old:0xffffffff` means that the CPU wasn't handling an interrupt when the exception occurred. The new exception has number `0x6`. By looking at an [exception table] we learn that `0x6` indicates a [Invalid Opcode] fault. So the lastly executed instruction was invalid. The register dump tells us that the current instruction was `0x10018a` (through `IP` (instruction pointer) or `pc` (program counter)). Therefore the instruction at `0x10018a` seems to be invalid. We can look at it using `objdump`:
|
||||
|
||||
[Invalid Opcode]: http://wiki.osdev.org/Exceptions#Invalid_Opcode
|
||||
|
||||
```
|
||||
> objdump -D build/kernel-x86_64.bin | grep "10018a:"
|
||||
10018a: 0f 10 05 c7 01 00 00 movups 0x1c7(%rip),%xmm0 ...
|
||||
```
|
||||
Through `objdump -D` we disassemble our whole kernel and `grep` picks the relevant line. The instruction at `0x10018a` seems to be a valid `movups` instruction. It's a [SSE] instruction that moves 128 bit between memory and SSE-registers (e.g. `xmm0`). But why the `Invalid Opcode` exception? The answer is hidden behind the [movups documentation][movups]: The section _Protected Mode Exceptions_ lists the conditions for the various exceptions. The short code of the `Invalid Opcode` is `#UD`. An `#UD` exception occurs:
|
||||
|
||||
> If an unmasked SIMD floating-point exception and OSXMMEXCPT in CR4 is 0. If EM in CR0 is set. If OSFXSR in CR4 is 0. If CPUID feature flag SSE is 0.
|
||||
|
||||
[SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions
|
||||
[movups]: http://www.c3se.chalmers.se/Common/VTUNE-9.1/doc/users_guide/mergedProjects/analyzer_ec/mergedProjects/reference_olh/mergedProjects/instructions/instruct32_hh/vc206.htm
|
||||
|
||||
The rough translation of this cryptic definition is: _If SSE isn't enabled_. So apparently Rust uses SSE instructions by default and we didn't enable SSE before. To fix this, we can either disable SSE instructions in the compiler or enable SSE in our kernel. We do the latter, as it's easier.
|
||||
|
||||
### Enabling SSE
|
||||
To enable SSE, assembly code is needed again. We want to add a function that tests if SSE is available and enables it then. Else we want to print an error message.
|
||||
|
||||
We add it to the `boot.asm` file:
|
||||
|
||||
```nasm
|
||||
; Check for SSE and enable it. If it's not supported throw error "a".
|
||||
set_up_SSE:
|
||||
; check for SSE
|
||||
mov eax, 0x1
|
||||
cpuid
|
||||
test edx, 1<<25
|
||||
jz .no_SSE
|
||||
|
||||
; enable SSE
|
||||
mov eax, cr0
|
||||
and ax, 0xFFFB ; clear coprocessor emulation CR0.EM
|
||||
or ax, 0x2 ; set coprocessor monitoring CR0.MP
|
||||
mov cr0, eax
|
||||
mov eax, cr4
|
||||
or ax, 3 << 9 ; set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||||
mov cr4, eax
|
||||
|
||||
ret
|
||||
.no_SSE:
|
||||
mov al, "a"
|
||||
jmp error
|
||||
```
|
||||
The code is from the great [OSDev Wiki][osdev sse] again. Notice that it sets/unsets exactly the bits that can cause the `Invalid Opcode` exception.
|
||||
|
||||
When we insert a `call set_up_SSE` somewhere in the `start` function (for example after `call enable_paging`), our Rust code will finally work.
|
||||
|
||||
[osdev sse]: http://wiki.osdev.org/SSE#Checking_for_SSE
|
||||
|
||||
### “OS returned!”
|
||||
Now that we're editing assembly anyway, we should change the `OKAY` message to something more meaningful. My suggestion is a red `OS returned!`:
|
||||
|
||||
```nasm
|
||||
...
|
||||
call rust_main
|
||||
|
||||
.os_returned:
|
||||
; rust main returned, print `OS returned!`
|
||||
mov rax, 0x4f724f204f534f4f
|
||||
mov [0xb8000], rax
|
||||
mov rax, 0x4f724f754f744f65
|
||||
mov [0xb8008], rax
|
||||
mov rax, 0x4f214f644f654f6e
|
||||
mov [0xb8010], rax
|
||||
hlt
|
||||
```
|
||||
Ok, that's enough assembly for now. Let's switch back to Rust.
|
||||
|
||||
## Hello World!
|
||||
Finally, it's time for a `Hello World!` from Rust:
|
||||
|
||||
```rust
|
||||
pub extern fn rust_main() {
|
||||
// ATTENTION: we have a very small stack and no guard page
|
||||
|
||||
let hello = b"Hello World!";
|
||||
let color_byte = 0x1f; // white foreground, blue background
|
||||
|
||||
let mut hello_colored = [color_byte; 24];
|
||||
for (i, char_byte) in hello.into_iter().enumerate() {
|
||||
hello_colored[i*2] = *char_byte;
|
||||
}
|
||||
|
||||
// write `Hello World!` to the center of the VGA text buffer
|
||||
let buffer_ptr = (0xb8000 + 1988) as *mut _;
|
||||
unsafe { *buffer_ptr = hello_colored };
|
||||
|
||||
loop{}
|
||||
}
|
||||
```
|
||||
Some notes:
|
||||
|
||||
- The `b` prefix creates a [byte string], which is just an array of `u8`
|
||||
- [enumerate] is an `Iterator` method that adds the current index `i` to elements
|
||||
- `buffer_ptr` is a [raw pointer] that points to the center of the VGA text buffer
|
||||
- Rust doesn't know the VGA buffer and thus can't guarantee that writing to the `buffer_ptr` is safe (it could point to important data). So we need to tell Rust that we know what we are doing by using an [unsafe block].
|
||||
|
||||
[byte string]: https://doc.rust-lang.org/reference.html#characters-and-strings
|
||||
[enumerate]: https://doc.rust-lang.org/nightly/core/iter/trait.Iterator.html#method.enumerate
|
||||
[unsafe block]: https://doc.rust-lang.org/book/unsafe.html
|
||||
|
||||
### Stack Overflows
|
||||
Since we still use the small 64 byte [stack from the last post], we must be careful not to [overflow] it. Normally, Rust tries to avoid stack overflows through _guard pages_: The page below the stack isn't mapped and such a stack overflow triggers a page fault (instead of silently overwriting random memory). But we can't unmap the page below our stack right now since we currently use only a single big page. Fortunately the stack is located just above the page tables. So some important page table entry would probably get overwritten on stack overflow and then a page fault occurs, too.
|
||||
|
||||
[stack from the last post]: {{% relref "2015-08-25-entering-longmode.md#creating-a-stack" %}}
|
||||
[overflow]: https://en.wikipedia.org/wiki/Stack_overflow
|
||||
|
||||
## What's next?
|
||||
Until now we write magic bits to some memory location when we want to print something to screen. In the [next post] we create a abstraction for the VGA text buffer that allows us to print strings in different colors and provides a simple interface.
|
||||
|
||||
[next post]: {{% relref "2015-10-23-printing-to-screen.md" %}}
|
||||
@@ -1,482 +0,0 @@
|
||||
+++
|
||||
title = "Printing to Screen"
|
||||
date = "2015-10-23"
|
||||
aliases = [
|
||||
"/2015/10/23/printing-to-screen/",
|
||||
"/rust-os/printing-to-screen.html",
|
||||
]
|
||||
+++
|
||||
|
||||
In the [previous post] we switched from assembly to [Rust], a systems programming language that provides great safety. But so far we are using unsafe features like [raw pointers] whenever we want to print to screen. In this post we will create a Rust module that provides a safe and easy-to-use interface for the VGA text buffer. It will support Rust's [formatting macros], too.
|
||||
|
||||
[previous post]: {{% relref "2015-09-02-set-up-rust.md" %}}
|
||||
[Rust]: https://www.rust-lang.org/
|
||||
[raw pointers]: https://doc.rust-lang.org/book/raw-pointers.html
|
||||
[formatting macros]: https://doc.rust-lang.org/std/fmt/#related-macros
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
This post uses recent unstable features, so you need an up-to-date nighly compiler. If you have any questions, problems, or suggestions please [file an issue] or create a comment at the bottom. The code from this post is also available on [Github][code repository].
|
||||
|
||||
[file an issue]: https://github.com/phil-opp/blog_os/issues
|
||||
[code repository]: https://github.com/phil-opp/blog_os/tree/printing_to_screen
|
||||
|
||||
## The VGA Text Buffer
|
||||
The text buffer starts at physical address `0xb8000` and contains the characters displayed on screen. It has 25 rows and 80 columns. Each screen character has the following format:
|
||||
|
||||
Bit(s) | Value
|
||||
------ | ----------------
|
||||
0-7 | ASCII code point
|
||||
8-11 | Foreground color
|
||||
12-14 | Background color
|
||||
15 | Blink
|
||||
|
||||
The following colors are available:
|
||||
|
||||
Number | Color | Number + Bright Bit | Bright Color
|
||||
------ | ---------- | ------------------- | -------------
|
||||
0x0 | Black | 0x8 | Dark Gray
|
||||
0x1 | Blue | 0x9 | Light Blue
|
||||
0x2 | Green | 0xa | Light Green
|
||||
0x3 | Cyan | 0xb | Light Cyan
|
||||
0x4 | Red | 0xc | Light Red
|
||||
0x5 | Magenta | 0xd | Pink
|
||||
0x6 | Brown | 0xe | Yellow
|
||||
0x7 | Light Gray | 0xf | White
|
||||
|
||||
Bit 4 is the _bright bit_, which turns for example blue into light blue. It is unavailable in background color as the bit is used to control if the text should blink. If you want to use a light background color (e.g. white) you have to disable blinking through a [BIOS function][disable blinking].
|
||||
|
||||
[disable blinking]: http://www.ctyme.com/intr/rb-0117.htm
|
||||
|
||||
## A basic Rust Module
|
||||
Now that we know how the VGA buffer works, we can create a Rust module to handle printing:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
mod vga_buffer;
|
||||
```
|
||||
|
||||
The content of this module can live either in `src/vga_buffer.rs` or `src/vga_buffer/mod.rs`. The latter supports submodules while the former does not. But our module does not need any submodules so we create it as `src/vga_buffer.rs`.
|
||||
|
||||
All of the code below goes into our new module (unless specified otherwise).
|
||||
|
||||
### Colors
|
||||
First, we represent the different colors using an enum:
|
||||
|
||||
```rust
|
||||
#[allow(dead_code)]
|
||||
#[repr(u8)]
|
||||
pub enum Color {
|
||||
Black = 0,
|
||||
Blue = 1,
|
||||
Green = 2,
|
||||
Cyan = 3,
|
||||
Red = 4,
|
||||
Magenta = 5,
|
||||
Brown = 6,
|
||||
LightGray = 7,
|
||||
DarkGray = 8,
|
||||
LightBlue = 9,
|
||||
LightGreen = 10,
|
||||
LightCyan = 11,
|
||||
LightRed = 12,
|
||||
Pink = 13,
|
||||
Yellow = 14,
|
||||
White = 15,
|
||||
}
|
||||
```
|
||||
We use a [C-like enum] here to explicitly specify the number for each color. Because of the `repr(u8)` attribute each enum variant is stored as an `u8`. Actually 4 bits would be sufficient, but Rust doesn't have an `u4` type.
|
||||
|
||||
[C-like enum]: http://rustbyexample.com/custom_types/enum/c_like.html
|
||||
|
||||
Normally the compiler would issue a warning for each unused variant. By using the `#[allow(dead_code)]` attribute we disable these warnings for the `Color` enum.
|
||||
|
||||
To represent a full color code that specifies foreground and background color, we create a [newtype] on top of `u8`:
|
||||
|
||||
[newtype]: https://aturon.github.io/features/types/newtype.html
|
||||
|
||||
```rust
|
||||
struct ColorCode(u8);
|
||||
|
||||
impl ColorCode {
|
||||
const fn new(foreground: Color, background: Color) -> ColorCode {
|
||||
ColorCode((background as u8) << 4 | (foreground as u8))
|
||||
}
|
||||
}
|
||||
```
|
||||
The `ColorCode` contains the full color byte, containing foreground and background color. Blinking is enabled implicitly by using a bright background color (soon we will disable blinking anyway). The `new` function is a [const function] to allow it in static initializers. As `const` functions are unstable we need to add the `const_fn` feature in `src/lib.rs`.
|
||||
|
||||
[const function]: https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md
|
||||
|
||||
### The Text Buffer
|
||||
Now we can add structures to represent a screen character and the text buffer:
|
||||
|
||||
```rust
|
||||
#[repr(C)]
|
||||
struct ScreenChar {
|
||||
ascii_character: u8,
|
||||
color_code: ColorCode,
|
||||
}
|
||||
|
||||
const BUFFER_HEIGHT: usize = 25;
|
||||
const BUFFER_WIDTH: usize = 80;
|
||||
|
||||
struct Buffer {
|
||||
chars: [[ScreenChar; BUFFER_WIDTH]; BUFFER_HEIGHT],
|
||||
}
|
||||
```
|
||||
Since the field ordering in default structs is undefined in Rust, we need the [repr(C\)] attribute. It guarantees that the struct's fields are laid out exactly like in a C struct and thus guarantees the correct field ordering.
|
||||
|
||||
[repr(C\)]: https://doc.rust-lang.org/nightly/nomicon/other-reprs.html#reprc
|
||||
|
||||
To actually write to screen, we now create a writer type:
|
||||
|
||||
```rust
|
||||
use core::ptr::Unique;
|
||||
|
||||
pub struct Writer {
|
||||
column_position: usize,
|
||||
color_code: ColorCode,
|
||||
buffer: Unique<Buffer>,
|
||||
}
|
||||
```
|
||||
The writer will always write to the last line and shift lines up when a line is full (or on `\n`). The `column_position` field keeps track of the current position in the last row. The current foreground and background colors are specified by `color_code` and a pointer to the VGA buffer is stored in `buffer`. To make it possible to create a `static` Writer later, the `buffer` field stores an `Unique<Buffer>` instead of a plain `*mut Buffer`. [Unique] is a wrapper that implements Send/Sync and is thus usable as a `static`. Since it's unstable, you may need to add the `unique` feature to `lib.rs`:
|
||||
|
||||
[Unique]: https://doc.rust-lang.org/nightly/core/ptr/struct.Unique.html
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
#![feature(unique)]
|
||||
```
|
||||
|
||||
## Printing to Screen
|
||||
Now we can use the `Writer` to modify the buffer's characters. First we create a method to write a single ASCII byte (it doesn't compile yet):
|
||||
|
||||
```rust
|
||||
impl Writer {
|
||||
pub fn write_byte(&mut self, byte: u8) {
|
||||
match byte {
|
||||
b'\n' => self.new_line(),
|
||||
byte => {
|
||||
if self.column_position >= BUFFER_WIDTH {
|
||||
self.new_line();
|
||||
}
|
||||
|
||||
let row = BUFFER_HEIGHT - 1;
|
||||
let col = self.column_position;
|
||||
|
||||
self.buffer().chars[row][col] = ScreenChar {
|
||||
ascii_character: byte,
|
||||
color_code: self.color_code,
|
||||
};
|
||||
self.column_position += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn buffer(&mut self) -> &mut Buffer {
|
||||
unsafe{ self.buffer.get_mut() }
|
||||
}
|
||||
|
||||
fn new_line(&mut self) {/* TODO */}
|
||||
}
|
||||
```
|
||||
If the byte is the [newline] byte `\n`, the writer does not print anything. Instead it calls a `new_line` method, which we'll implement later. Other bytes get printed to the screen in the second match case.
|
||||
|
||||
[newline]: https://en.wikipedia.org/wiki/Newline
|
||||
|
||||
When printing a byte, the writer checks if the current line is full. In that case, a `new_line` call is required before to wrap the line. Then it writes a new `ScreenChar` to the buffer at the current position. Finally, the current column position is advanced.
|
||||
|
||||
The `buffer()` auxiliary method converts the raw pointer in the `buffer` field into a safe mutable buffer reference. The unsafe block is needed because the [get_mut()] method of `Unique` is unsafe. But our `buffer()` method itself isn't marked as unsafe, so it must not introduce any unsafety (e.g. cause segfaults). To guarantee that, it's very important that the `buffer` field always points to a valid `Buffer`. It's like a contract that we must stand to every time we create a `Writer`. To ensure that it's not possible to create an invalid `Writer` from outside of the module, the struct must have at least one private field and public creation functions are not allowed either.
|
||||
[get_mut()]: https://doc.rust-lang.org/nightly/core/ptr/struct.Unique.html#method.get_mut
|
||||
|
||||
### Cannot Move out of Borrowed Content
|
||||
When we try to compile it, we get the following error:
|
||||
|
||||
```
|
||||
error: cannot move out of borrowed content [E0507]
|
||||
color_code: self.color_code,
|
||||
^~~~
|
||||
```
|
||||
The reason it that Rust _moves_ values by default instead of copying them like other languages. And we cannot move `color_code` out of `self` because we only borrowed `self`. For more information check out the [ownership section] in the Rust book. To fix it, we can implement the [Copy trait] for the `ColorCode` type by adding `#[derive(Clone, Copy)]` to its struct.
|
||||
[ownership section]: https://doc.rust-lang.org/book/ownership.html
|
||||
[Copy trait]: https://doc.rust-lang.org/nightly/core/marker/trait.Copy.html
|
||||
|
||||
### Try it out!
|
||||
To write some characters to the screen, you can create a temporary function:
|
||||
|
||||
```rust
|
||||
pub fn print_something() {
|
||||
let mut writer = Writer {
|
||||
column_position: 0,
|
||||
color_code: ColorCode::new(Color::LightGreen, Color::Black),
|
||||
buffer: unsafe { Unique::new(0xb8000 as *mut _) },
|
||||
};
|
||||
|
||||
writer.write_byte(b'H');
|
||||
}
|
||||
```
|
||||
It just creates a new Writer that points to the VGA buffer at `0xb8000`. Then it writes the byte `b'H'` to it. The `b` prefix creates a [byte character], which represents an ASCII code point. When we call `vga_buffer::print_something` in main, a `H` should be printed in the _lower_ left corner of the screen in light green.
|
||||
|
||||
[byte character]: https://doc.rust-lang.org/reference.html#characters-and-strings
|
||||
|
||||
### Printing Strings
|
||||
|
||||
To print whole strings, we can convert them to bytes and print them one-by-one:
|
||||
|
||||
```rust
|
||||
// in `impl Writer`
|
||||
pub fn write_str(&mut self, s: &str) {
|
||||
for byte in s.bytes() {
|
||||
self.write_byte(byte)
|
||||
}
|
||||
}
|
||||
```
|
||||
You can try it yourself in the `print_something` function.
|
||||
|
||||
When you print strings with some special characters like `ä` or `λ`, you'll notice that they cause weird symbols on screen. That's because they are represented by multiple bytes in [UTF-8]. By converting them to bytes, we of course get strange results. But since the VGA buffer doesn't support UTF-8, it's not possible to display these characters anyway.
|
||||
|
||||
[core tracking issue]: https://github.com/rust-lang/rust/issues/27701
|
||||
[UTF-8]: http://www.fileformat.info/info/unicode/utf8.htm
|
||||
|
||||
### Support Formatting Macros
|
||||
It would be nice to support Rust's formatting macros, too. That way, we can easily print different types like integers or floats. To support them, we need to implement the [core::fmt::Write] trait. The only required method of this trait is `write_str` that looks quite similar to our `write_str` method. To implement the trait, we just need to move it into an `impl ::core::fmt::Write for Writer` block and add a return type:
|
||||
|
||||
```rust
|
||||
impl ::core::fmt::Write for Writer {
|
||||
fn write_str(&mut self, s: &str) -> ::core::fmt::Result {
|
||||
for byte in s.bytes() {
|
||||
self.write_byte(byte)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
```
|
||||
The `Ok(())` is just a `Ok` Result containing the `()` type. We can drop the `pub` because trait methods are always public.
|
||||
|
||||
Now we can use Rust's built-in `write!`/`writeln!` formatting macros:
|
||||
|
||||
```rust
|
||||
// in the `print_something` function
|
||||
use core::fmt::Write;
|
||||
let mut writer = Writer {...};
|
||||
writer.write_byte(b'H');
|
||||
writer.write_str("ello! ");
|
||||
write!(writer, "The numbers are {} and {}", 42, 1.0/3.0);
|
||||
```
|
||||
Now you should see a `Hello! The numbers are 42 and 0.3333333333333333` in strange colors at the bottom of the screen.
|
||||
|
||||
[core::fmt::Write]: https://doc.rust-lang.org/nightly/core/fmt/trait.Write.html
|
||||
|
||||
### Newlines
|
||||
Right now, we just ignore newlines and characters that don't fit into the line anymore. Instead we want to move every character one line up (the top line gets deleted) and start at the beginning of the last line again. To do this, we add an implementation for the `new_line` method of `Writer`:
|
||||
|
||||
```rust
|
||||
// in `impl Writer`
|
||||
|
||||
fn new_line(&mut self) {
|
||||
for row in 0..(BUFFER_HEIGHT-1) {
|
||||
let buffer = self.buffer();
|
||||
buffer.chars[row] = buffer.chars[row + 1]
|
||||
}
|
||||
self.clear_row(BUFFER_HEIGHT-1);
|
||||
self.column_position = 0;
|
||||
}
|
||||
|
||||
fn clear_row(&mut self, row: usize) {/* TODO */}
|
||||
```
|
||||
We just move each line to the line above. Notice that the range notation (`..`) is exclusive the upper bound. But when we try to compile it, we get an borrow checker error again:
|
||||
|
||||
```
|
||||
error: cannot move out of indexed content [E0507]
|
||||
buffer.chars[row] = buffer.chars[row + 1]
|
||||
^~~~~~~~~~~~~~~~~~~~~
|
||||
```
|
||||
It's because of Rust's move semantics again: We try to move out the `ScreenChar` at `row + 1`. If Rust would allow that, the array would become invalid as it would contain some valid and some moved out values. Fortunately, the `ScreenChar` type meets all criteria for the [Copy trait], so we can fix the problem by adding `#[derive(Clone, Copy)]` to `ScreenChar`.
|
||||
|
||||
Now we only need to implement the `clear_row` method to finish the newline code:
|
||||
|
||||
```rust
|
||||
// in `impl Writer`
|
||||
fn clear_row(&mut self, row: usize) {
|
||||
let blank = ScreenChar {
|
||||
ascii_character: b' ',
|
||||
color_code: self.color_code,
|
||||
};
|
||||
self.buffer().chars[row] = [blank; BUFFER_WIDTH];
|
||||
}
|
||||
```
|
||||
|
||||
## Providing an Interface
|
||||
To provide a global writer that can used as an interface from other modules, we can add a `static` writer:
|
||||
|
||||
```rust
|
||||
pub static WRITER: Writer = Writer {
|
||||
column_position: 0,
|
||||
color_code: ColorCode::new(Color::LightGreen, Color::Black),
|
||||
buffer: unsafe { Unique::new(0xb8000 as *mut _) },
|
||||
};
|
||||
```
|
||||
|
||||
But we can't use it to print anything! You can try it yourself in the `print_something` function. The reason is that we try to take a mutable reference (`&mut`) to a immutable `static` when calling `WRITER.print_byte`.
|
||||
|
||||
To resolve it, we could use a [mutable static]. But then every read and write to it would be unsafe since it could easily introduce data races and other bad things. Using `static mut` is highly discouraged, there are even proposals to [remove it][remove static mut].
|
||||
|
||||
[mutable static]: https://doc.rust-lang.org/book/const-and-static.html#mutability
|
||||
[remove static mut]: https://internals.rust-lang.org/t/pre-rfc-remove-static-mut/1437
|
||||
|
||||
But what are the alternatives? We could try to use a cell type like [RefCell] or even [UnsafeCell] to provide [interior mutability]. But these types aren't [Sync] \(with good reason), so we can't use them in statics.
|
||||
|
||||
[RefCell]: https://doc.rust-lang.org/nightly/core/cell/struct.RefCell.html
|
||||
[UnsafeCell]: https://doc.rust-lang.org/nightly/core/cell/struct.UnsafeCell.html
|
||||
[interior mutability]: https://doc.rust-lang.org/book/mutability.html#interior-vs.-exterior-mutability
|
||||
[Sync]: https://doc.rust-lang.org/nightly/core/marker/trait.Sync.html
|
||||
|
||||
To get synchronized interior mutability, users of the standard library can use [Mutex]. It provides mutual exclusion by blocking threads when the resource is already locked. But our basic kernel does not have any blocking support or even a concept of threads, so we can't use it either. However there is a really basic kind of mutex in computer science that requires no operating system features: the [spinlock]. Instead of blocking, the threads simply try to lock it again and again in a tight loop and thus burn CPU time until the mutex is free again.
|
||||
|
||||
[Mutex]: https://doc.rust-lang.org/nightly/std/sync/struct.Mutex.html
|
||||
[spinlock]: https://en.wikipedia.org/wiki/Spinlock
|
||||
|
||||
To use a spinning mutex, we can add the [spin crate] as a dependency:
|
||||
|
||||
[spin crate]: https://crates.io/crates/spin
|
||||
|
||||
```toml
|
||||
# in Cargo.toml
|
||||
[dependencies]
|
||||
rlibc = "0.1.4"
|
||||
spin = "0.3.4"
|
||||
```
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
extern crate spin;
|
||||
```
|
||||
|
||||
Then we can use the spinning Mutex to add interior mutability to our static writer:
|
||||
|
||||
```rust
|
||||
// in src/vga_buffer.rs again
|
||||
use spin::Mutex;
|
||||
...
|
||||
pub static WRITER: Mutex<Writer> = Mutex::new(Writer {
|
||||
column_position: 0,
|
||||
color_code: ColorCode::new(Color::LightGreen, Color::Black),
|
||||
buffer: unsafe { Unique::new(0xb8000 as *mut _) },
|
||||
});
|
||||
```
|
||||
[Mutex::new] is a const function, too, so it can be used in statics.
|
||||
|
||||
Now we can easily print from our main function:
|
||||
|
||||
[Mutex::new]: https://mvdnes.github.io/rust-docs/spinlock-rs/spin/struct.Mutex.html#method.new
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
pub extern fn rust_main() {
|
||||
use core::fmt::Write;
|
||||
vga_buffer::WRITER.lock().write_str("Hello again");
|
||||
write!(vga_buffer::WRITER.lock(), ", some numbers: {} {}", 42, 1.337);
|
||||
loop{}
|
||||
}
|
||||
```
|
||||
Note that we need to import the `Write` trait if we want to use its functions.
|
||||
|
||||
## A println macro
|
||||
Rust's [macro syntax] is a bit strange, so we won't try to write a macro from scratch. Instead we look at the source of the [`println!` macro] in the standard library:
|
||||
|
||||
[macro syntax]: https://doc.rust-lang.org/nightly/book/macros.html
|
||||
[`println!` macro]: https://doc.rust-lang.org/nightly/std/macro.println!.html
|
||||
|
||||
```rust
|
||||
macro_rules! println {
|
||||
($fmt:expr) => (print!(concat!($fmt, "\n")));
|
||||
($fmt:expr, $($arg:tt)*) => (print!(concat!($fmt, "\n"), $($arg)*));
|
||||
}
|
||||
```
|
||||
It just adds a `\n` and then invokes the [`print!` macro], which is defined as:
|
||||
|
||||
[`print!` macro]: https://doc.rust-lang.org/nightly/std/macro.print!.html
|
||||
|
||||
```rust
|
||||
macro_rules! print {
|
||||
($($arg:tt)*) => ($crate::io::_print(format_args!($($arg)*)));
|
||||
}
|
||||
```
|
||||
It calls the `_print` method in the `io` module of the current crate (`$crate`), which is `std`. The [`_print` function] in libstd is rather complicated, as it supports different `Stdout` devices.
|
||||
|
||||
[`_print` function]: https://doc.rust-lang.org/nightly/src/std/io/stdio.rs.html#578
|
||||
|
||||
To print to the VGA buffer, we just copy the `println!` macro and modify the `print!` macro to use our static `WRITER` instead of `_print`:
|
||||
|
||||
```rust
|
||||
// in src/vga_buffer.rs
|
||||
macro_rules! print {
|
||||
($($arg:tt)*) => ({
|
||||
use core::fmt::Write;
|
||||
let mut writer = $crate::vga_buffer::WRITER.lock();
|
||||
writer.write_fmt(format_args!($($arg)*)).unwrap();
|
||||
});
|
||||
}
|
||||
```
|
||||
Instead of a `_print` function, we call the `write_fmt` method of our static `Writer`. Since we're using a method from the `Write` trait, we need to import it before. The additional `unwrap()` at the end panics if printing isn't successful. But since we always return `Ok` in `write_str`, that should not happen.
|
||||
|
||||
Note the additional `{}` scope around the macro: I wrote `=> ({…})` instead of `=> (…)`. The additional `{}` avoids that the `Write` trait is silently imported when `print` is used.
|
||||
|
||||
### Clearing the screen
|
||||
We can now use `println!` to add a rather trivial function to clear the screen:
|
||||
|
||||
```rust
|
||||
// in src/vga_buffer.rs
|
||||
pub fn clear_screen() {
|
||||
for _ in 0..BUFFER_HEIGHT {
|
||||
println!("");
|
||||
}
|
||||
}
|
||||
```
|
||||
### Hello World using `println`
|
||||
To use `println` in `lib.rs`, we need to import the macros of the VGA buffer module first. Therefore we add a `#[macro_use]` attribute to the module declaration:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
#[macro_use]
|
||||
mod vga_buffer;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn rust_main() {
|
||||
// ATTENTION: we have a very small stack and no guard page
|
||||
vga_buffer::clear_screen();
|
||||
println!("Hello World{}", "!");
|
||||
|
||||
loop{}
|
||||
}
|
||||
```
|
||||
Since we imported the macros at crate level, they are available in all modules and thus provide an easy and safe interface to the VGA buffer.
|
||||
|
||||
## What's next?
|
||||
In the next posts we will map the kernel pages correctly so that accessing `0x0` or writing to `.rodata` is not possible anymore. To obtain the loaded kernel sections we will read the Multiboot information structure. Then we will create a paging module and use it to switch to a new page table where the kernel sections are mapped correctly.
|
||||
|
||||
The [next post] describes the Multiboot information structure and creates a frame allocator using the information about memory areas.
|
||||
|
||||
[next post]: {{% relref "2015-11-15-allocating-frames.md" %}}
|
||||
|
||||
## Other Rust OS Projects
|
||||
Now that you know the very basics of OS development in Rust, you should also check out the following projects:
|
||||
|
||||
- [Rust Bare-Bones Kernel]: A basic kernel with roughly the same functionality as ours. Writes output to the serial port instead of the VGA buffer and maps the kernel to the [higher half] \(instead of our identity mapping).
|
||||
_Note_: You need to [cross compile binutils] to build it (or you create some symbolic links[^fn-symlink] if you're on x86_64).
|
||||
[Rust Bare-Bones Kernel]: https://github.com/thepowersgang/rust-barebones-kernel
|
||||
[higher half]: http://wiki.osdev.org/Higher_Half_Kernel
|
||||
[cross compile binutils]: {{% relref "cross-compile-binutils.md" %}}
|
||||
[^fn-symlink]: You will need to symlink `x86_64-none_elf-XXX` to `/usr/bin/XXX` where `XXX` is in {`as`, `ld`, `objcopy`, `objdump`, `strip`}. The `x86_64-none_elf-XXX` files must be in some folder that is in your `$PATH`. But then you can only build for your x86_64 host architecture, so use this hack only for testing.
|
||||
|
||||
- [RustOS]: More advanced kernel that supports allocation, keyboard inputs, and threads. It also has a scheduler and a basic network driver.
|
||||
[RustOS]: https://github.com/RustOS-Fork-Holding-Ground/RustOS
|
||||
|
||||
- ["Tifflin" Experimental Kernel]: Big kernel project by thepowersgang, that is actively developed and has over 650 commits. It has a separate userspace and supports multiple file systems, even a GUI is included. Needs a cross compiler.
|
||||
["Tifflin" Experimental Kernel]:https://github.com/thepowersgang/rust_os
|
||||
|
||||
- [Redox]: Probably the most complete Rust OS today. It has an active community and over 1000 Github stars. File systems, network, an audio player, a picture viewer, and much more. Just take a look at the [screenshots][redox screenshots].
|
||||
[Redox]: https://github.com/redox-os/redox
|
||||
[redox screenshots]: https://github.com/redox-os/redox#what-it-looks-like
|
||||
@@ -1,433 +0,0 @@
|
||||
+++
|
||||
title = "Allocating Frames"
|
||||
date = "2015-11-15"
|
||||
+++
|
||||
|
||||
In this post we create an allocator that provides free physical frames for a future paging module. To get the required information about available and used memory we use the Multiboot information structure. Additionally, we improve the `panic` handler to print the corresponding message and source line.
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
The full source code is available on [Github][source repo]. Feel free to open issues there if you have any problems or improvements. You can also leave a comment at the bottom.
|
||||
|
||||
[source repo]: https://github.com/phil-opp/blog_os/tree/allocating_frames
|
||||
|
||||
## Preparation
|
||||
We still have a really tiny stack of 64 bytes, which won't suffice for this post. So we increase it to 4096 bytes (one page) in `boot.asm`:
|
||||
|
||||
```asm
|
||||
section .bss
|
||||
...
|
||||
stack_bottom:
|
||||
resb 4096
|
||||
stack_top:
|
||||
```
|
||||
|
||||
## The Multiboot Information Structure
|
||||
When a Multiboot compliant bootloader loads a kernel, it passes a pointer to a boot information structure in the `ebx` register. We can use it to get information about available memory and loaded kernel sections.
|
||||
|
||||
First, we need to pass this pointer to our kernel as an argument to `rust_main`. To find out how arguments are passed to functions, we can look at the [calling convention of Linux]:
|
||||
|
||||
[calling convention of Linux]: https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
|
||||
|
||||
> The first six integer or pointer arguments are passed in registers RDI, RSI, RDX, RCX, R8, and R9
|
||||
|
||||
So to pass the pointer to our kernel, we need to move it to `rdi` before calling the kernel. Since we're not using the `rdi`/`edi` register in our bootstrap code, we can simply set the `edi` register right after booting (in `boot.asm`):
|
||||
|
||||
```nasm
|
||||
start:
|
||||
mov esp, stack_top
|
||||
mov edi, ebx ; Move Multiboot info pointer to edi
|
||||
```
|
||||
Now we can add the argument to our `rust_main`:
|
||||
|
||||
```rust
|
||||
pub extern fn rust_main(multiboot_information_address: usize) { ... }
|
||||
```
|
||||
|
||||
Instead of writing an own Multiboot module, we use the [multiboot2-elf64] crate. It gives us some basic information about mapped kernel sections and available memory. I just wrote it for this blog post since I could not find any other Multiboot 2 crate. It's really ugly and incomplete, but it does its job[^fn-multiboot-crate].
|
||||
|
||||
[multiboot2-elf64]: https://github.com/phil-opp/multiboot2-elf64
|
||||
[^fn-multiboot-crate]: All contributions are welcome! If you want to maintain it, please contact me!
|
||||
|
||||
So let's add a dependency on the git repository:
|
||||
|
||||
```toml
|
||||
# in Cargo.toml
|
||||
[dependencies.multiboot2]
|
||||
git = "https://github.com/phil-opp/multiboot2-elf64"
|
||||
```
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
extern crate multiboot2;
|
||||
```
|
||||
|
||||
Now we can use it to print available memory areas.
|
||||
|
||||
### Available Memory
|
||||
The boot information structure consists of various _tags_. See section 3.4 of the Multiboot specification ([PDF][multiboot specification]) for a complete list. The _memory map_ tag contains a list of all available RAM areas. Special areas such as the VGA text buffer at `0xb8000` are not available. Note that some of the available memory is already used by our kernel and by the multiboot information structure itself.
|
||||
|
||||
[multiboot specification]: http://nongnu.askapache.com/grub/phcoder/multiboot.pdf
|
||||
|
||||
To print all available memory areas, we can use the `multiboot2` crate in our `rust_main` as follows:
|
||||
|
||||
```rust
|
||||
let boot_info = unsafe{ multiboot2::load(multiboot_information_address) };
|
||||
let memory_map_tag = boot_info.memory_map_tag()
|
||||
.expect("Memory map tag required");
|
||||
|
||||
println!("memory areas:");
|
||||
for area in memory_map_tag.memory_areas() {
|
||||
println!(" start: 0x{:x}, length: 0x{:x}",
|
||||
area.base_addr, area.length);
|
||||
}
|
||||
```
|
||||
The `load` function is `unsafe` because it relies on a valid address. Since the memory tag is not required by the Multiboot specification, the `memory_map_tag()` function returns an `Option`. The `memory_areas()` function returns the desired memory area iterator.
|
||||
|
||||
The output looks like this:
|
||||
|
||||
```
|
||||
Hello World!
|
||||
memory areas:
|
||||
start: 0x0, length: 0x9fc00
|
||||
start: 0x100000, length: 0x7ee0000
|
||||
```
|
||||
So we have one area from `0x0` to `0x9fc00`, which is a bit below the 1MiB mark. The second, bigger area starts at 1MiB and contains the rest of available memory. The area from `0x9fc00` to 1MiB is not available since it contains for example the VGA text buffer at `0xb8000`. This is the reason for putting our kernel at 1MiB and not somewhere below.
|
||||
|
||||
If you give QEMU more than 4GiB of memory by passing `-m 5G`, you get another unusable area below the 4GiB mark. This memory is normally mapped to some hardware devices. See the [OSDev Wiki][Memory_map] for more information.
|
||||
|
||||
[Memory_map]: http://wiki.osdev.org/Memory_Map_(x86)
|
||||
|
||||
### Handling Panics
|
||||
We used `expect` in the code above, which will panic if there is no memory map tag. But our current panic handler just loops without printing any error message. Of course we could replace `expect` by a `match`, but we should fix the panic handler nonetheless:
|
||||
|
||||
```rust
|
||||
#[lang = "panic_fmt"]
|
||||
extern fn panic_fmt() -> ! {
|
||||
println!("PANIC");
|
||||
loop{}
|
||||
}
|
||||
```
|
||||
Now we get a `PANIC` message. But we can do even better. The `panic_fmt` function has actually some arguments:
|
||||
|
||||
```rust
|
||||
#[lang = "panic_fmt"]
|
||||
extern fn panic_fmt(fmt: core::fmt::Arguments, file: &str, line: u32) -> ! {
|
||||
println!("\n\nPANIC in {} at line {}:", file, line);
|
||||
println!(" {}", fmt);
|
||||
loop{}
|
||||
}
|
||||
```
|
||||
Be careful with these arguments as the compiler does not check the function signature for `lang_items`.
|
||||
|
||||
Now we get the panic message and the causing source line. You can try it by inserting a `panic` somewhere.
|
||||
|
||||
### Kernel ELF Sections
|
||||
To read and print the sections of our kernel ELF file, we can use the _Elf-sections_ tag:
|
||||
|
||||
```rust
|
||||
let elf_sections_tag = boot_info.elf_sections_tag()
|
||||
.expect("Elf-sections tag required");
|
||||
|
||||
println!("kernel sections:");
|
||||
for section in elf_sections_tag.sections() {
|
||||
println!(" addr: 0x{:x}, size: 0x{:x}, flags: 0x{:x}",
|
||||
section.addr, section.size, section.flags);
|
||||
}
|
||||
```
|
||||
This should print out the start address and size of all kernel sections. If the section is writable, the `0x1` bit is set in `flags`. The `0x4` bit marks an executable section and the `0x2` bit indicates that the section was loaded in memory. For example, the `.text` section is executable but not writable and the `.data` section just the opposite.
|
||||
|
||||
But when we execute it, tons of really small sections are printed. We can use the `objdump -h build/kernel-x86_64.bin` command to list the sections with name. There seem to be over 200 sections and many of them start with `.text.*` or `.data.rel.ro.local.*`. This is because the Rust compiler puts e.g. each function in its own `.text` subsection. That way, unused functions are removed when the linker omits unused sections.
|
||||
|
||||
To merge these subsections, we need to update our linker script:
|
||||
|
||||
```
|
||||
ENTRY(start)
|
||||
|
||||
SECTIONS {
|
||||
. = 1M;
|
||||
|
||||
.boot :
|
||||
{
|
||||
KEEP(*(.multiboot_header))
|
||||
}
|
||||
|
||||
.text :
|
||||
{
|
||||
*(.text .text.*)
|
||||
}
|
||||
|
||||
.rodata : {
|
||||
*(.rodata .rodata.*)
|
||||
}
|
||||
|
||||
.data.rel.ro : {
|
||||
*(.data.rel.ro.local*) *(.data.rel.ro .data.rel.ro.*)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
These lines are taken from the default linker script of `ld`, which can be obtained through `ld ‑verbose`. The `.text` _output_ section contains now all `.text.*` _input_ sections of the static library (and the same applies for the `.rodata` and `.data.rel.ro` sections).
|
||||
|
||||
Now there are only 12 sections left and we get a much more useful output:
|
||||
|
||||

|
||||
|
||||
If you like, you can compare this output to the `objdump -h build/kernel-x86_64.bin` output. You will see that the start addresses and sizes match exactly for each section. The sections with flags `0x0` are mostly debug sections, so they don't need to be loaded. And the last few sections of the QEMU output aren't in the `objdump` output because they are special sections such as string tables.
|
||||
|
||||
### Start and End of Kernel
|
||||
We can now use the ELF section tag to calculate the start and end address of our loaded kernel:
|
||||
|
||||
```rust
|
||||
let kernel_start = elf_sections_tag.sections().map(|s| s.addr)
|
||||
.min().unwrap();
|
||||
let kernel_end = elf_sections_tag.sections().map(|s| s.addr + s.size)
|
||||
.max().unwrap();
|
||||
```
|
||||
The other used memory area is the Multiboot Information structure:
|
||||
|
||||
```rust
|
||||
let multiboot_start = multiboot_information_address;
|
||||
let multiboot_end = multiboot_start + (boot_info.total_size as usize);
|
||||
```
|
||||
Printing these numbers gives us:
|
||||
|
||||
```
|
||||
kernel_start: 0x100000, kernel_end: 0x11a168
|
||||
multiboot_start: 0x11d400, multiboot_end: 0x11d9c8
|
||||
```
|
||||
So the kernel starts at 1MiB (like expected) and is about 105 KiB in size. The multiboot information structure was placed at `0x11d400` by GRUB and needs 1480 bytes. Of course your numbers could be a bit different due to different versions of Rust or GRUB (or some differences in the source code).
|
||||
|
||||
## A frame allocator
|
||||
When using paging, the physical memory is split into equally sized chunks (normally 4096 bytes) Such a chunk is called "physical page" or "frame". These frames can be mapped to any virtual page through page tables. For more information about paging take a peek at the [next post].
|
||||
|
||||
We will need a free frame in many cases. For example when want to increase the size of our future kernel heap. Or when we create a new page table. Or when we add a new kernel thread and thus need to allocate a new stack. So we need some kind of allocator that keeps track of physical frames and gives us a free one when needed.
|
||||
|
||||
There are various ways to write such a frame allocator:
|
||||
|
||||
We could create some kind of linked list from the free frames. For example, each frame could begin with a pointer to the next free frame. Since the frames are free, this would not overwrite any data. Our allocator would just save the head of the list and could easily allocate and deallocate frames by updating pointers. Unfortunately, this approach has a problem: It requires reading and writing these free frames. So we would need to map all physical frames to some virtual address, at least temporary. Another disadvantage is that we need to create this linked list at startup. That implies that we need to set over one million pointers at startup if the machine has 4GiB of RAM.
|
||||
|
||||
Another approach is to create some kind of data structure such as a [bitmap or a stack] to manage free frames. We could place it in the already identity mapped area right behind the kernel or multiboot structure. That way we would not need to (temporary) map each free frame. But it has the same problem of the slow initial creating/filling. In fact, we will use this approach in a future post to manage frames that are freed again. But for the initial management of free frames, we use a different method.
|
||||
[bitmap or a stack]: http://wiki.osdev.org/Page_Frame_Allocation#Physical_Memory_Allocators
|
||||
|
||||
In the following, we will use Multiboot's memory map directly. The idea is to maintain a simple counter that starts at frame 0 and is increased constantly. If the current frame is available (part of an available area in the memory map) and not used by the kernel or the multiboot structure (we know their start and end addresses), we know that it's free and return it. Else, we increase the counter to the next possibly free frame. That way, we don't need to create a data structure when booting and the physical frames can remain unmapped. The only problem is that we cannot reasonably free frames again, but we will solve that problem in a future post (by adding an intermediate frame stack that saves freed frames).
|
||||
|
||||
<!--- TODO link future post -->
|
||||
|
||||
So let's start implementing our memory map based frame allocator.
|
||||
|
||||
### A Memory Module
|
||||
First we create a memory module with a `Frame` type (`src/memory/mod.rs`):
|
||||
|
||||
```rust
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Frame {
|
||||
number: usize,
|
||||
}
|
||||
```
|
||||
(Don't forget to add the `mod memory` line to `src/lib.rs`.) Instead of e.g. the start address, we just store the frame number. We use `usize` here since the number of frames depends on the memory size. The long `derive` line makes frames printable and comparable.
|
||||
|
||||
_Update_: In a previous version, the `Clone` and `Copy` traits were derived, too. [This was removed][PR 52] to make the allocator interface safer.
|
||||
[PR 52]: https://github.com/phil-opp/blog_os/pull/52
|
||||
|
||||
To make it easy to get the corresponding frame for a physical address, we add a `containing_address` method:
|
||||
|
||||
```rust
|
||||
pub const PAGE_SIZE: usize = 4096;
|
||||
|
||||
impl Frame {
|
||||
fn containing_address(address: usize) -> Frame {
|
||||
Frame{ number: address / PAGE_SIZE }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
We also add a `FrameAllocator` trait:
|
||||
|
||||
```rust
|
||||
pub trait FrameAllocator {
|
||||
fn allocate_frame(&mut self) -> Option<Frame>;
|
||||
fn deallocate_frame(&mut self, frame: Frame);
|
||||
}
|
||||
```
|
||||
This allows us to create another, more advanced frame allocator in the future.
|
||||
|
||||
### The Allocator
|
||||
Now we can put everything together and create the actual frame allocator. Therefor we create a `src/memory/area_frame_allocator.rs` submodule. The allocator struct looks like this:
|
||||
|
||||
```rust
|
||||
use memory::{Frame, FrameAllocator};
|
||||
use multiboot2::{MemoryAreaIter, MemoryArea};
|
||||
|
||||
pub struct AreaFrameAllocator {
|
||||
next_free_frame: Frame,
|
||||
current_area: Option<&'static MemoryArea>,
|
||||
areas: MemoryAreaIter,
|
||||
kernel_start: Frame,
|
||||
kernel_end: Frame,
|
||||
multiboot_start: Frame,
|
||||
multiboot_end: Frame,
|
||||
}
|
||||
```
|
||||
The `next_free_frame` field is a simple counter that is increased every time we return a frame. It's initialized to `0` and every frame below it counts as used. The `current_area` field holds the memory area that contains `next_free_frame`. If `next_free_frame` leaves this area, we will look for the next one in `areas`. When there are no areas left, all frames are used and `current_area` becomes `None`. The `{kernel, multiboot}_{start, end}` fields are used to avoid returning already used fields.
|
||||
|
||||
To implement the `FrameAllocator` trait, we need to implement the allocation and deallocation methods:
|
||||
|
||||
```rust
|
||||
impl FrameAllocator for AreaFrameAllocator {
|
||||
fn allocate_frame(&mut self) -> Option<Frame> {
|
||||
// TODO (see below)
|
||||
}
|
||||
|
||||
fn deallocate_frame(&mut self, frame: Frame) {
|
||||
// TODO (see below)
|
||||
}
|
||||
}
|
||||
```
|
||||
The `allocate_frame` method looks like this:
|
||||
|
||||
```rust
|
||||
// in `allocate_frame` in `impl FrameAllocator for AreaFrameAllocator`
|
||||
|
||||
if let Some(area) = self.current_area {
|
||||
// "Clone" the frame to return it if it's free. Frame doesn't
|
||||
// implement Clone, but we can construct an identical frame.
|
||||
let frame = Frame{ number: self.next_free_frame.number };
|
||||
|
||||
// the last frame of the current area
|
||||
let current_area_last_frame = {
|
||||
let address = area.base_addr + area.length - 1;
|
||||
Frame::containing_address(address as usize)
|
||||
};
|
||||
|
||||
if frame > current_area_last_frame {
|
||||
// all frames of current area are used, switch to next area
|
||||
self.choose_next_area();
|
||||
} else if frame >= self.kernel_start && frame <= self.kernel_end {
|
||||
// `frame` is used by the kernel
|
||||
self.next_free_frame = Frame {
|
||||
number: self.kernel_end.number + 1
|
||||
};
|
||||
} else if frame >= self.multiboot_start && frame <= self.multiboot_end {
|
||||
// `frame` is used by the multiboot information structure
|
||||
self.next_free_frame = Frame {
|
||||
number: self.multiboot_end.number + 1
|
||||
};
|
||||
} else {
|
||||
// frame is unused, increment `next_free_frame` and return it
|
||||
self.next_free_frame.number += 1;
|
||||
return Some(frame);
|
||||
}
|
||||
// `frame` was not valid, try it again with the updated `next_free_frame`
|
||||
self.allocate_frame()
|
||||
} else {
|
||||
None // no free frames left
|
||||
}
|
||||
```
|
||||
The `choose_next_area` method isn't part of the trait and thus goes into a new `impl AreaFrameAllocator` block:
|
||||
|
||||
```rust
|
||||
// in `impl AreaFrameAllocator`
|
||||
|
||||
fn choose_next_area(&mut self) {
|
||||
self.current_area = self.areas.clone().filter(|area| {
|
||||
let address = area.base_addr + area.length - 1;
|
||||
Frame::containing_address(address as usize) >= self.next_free_frame
|
||||
}).min_by_key(|area| area.base_addr);
|
||||
|
||||
if let Some(area) = self.current_area {
|
||||
let start_frame = Frame::containing_address(area.base_addr as usize);
|
||||
if self.next_free_frame < start_frame {
|
||||
self.next_free_frame = start_frame;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
This function chooses the area with the minimal base address that still has free frames, i.e. `next_free_frame` is smaller than its last frame. Note that we need to clone the iterator because the [min_by_key] function consumes it. If there are no areas with free frames left, `min_by_key` automatically returns the desired `None`.
|
||||
|
||||
[min_by_key]: https://doc.rust-lang.org/nightly/core/iter/trait.Iterator.html#method.min_by_key
|
||||
|
||||
If the `next_free_frame` is below the new `current_area`, it needs to be updated to the area's start frame. Else, the `allocate_frame` call could return an unavailable frame.
|
||||
|
||||
We don't have a data structure to store free frames, so we can't implement `deallocate_frame` reasonably. Thus we use the `unimplemented` macro, which just panics when the method is called:
|
||||
|
||||
```rust
|
||||
impl FrameAllocator for AreaFrameAllocator {
|
||||
fn allocate_frame(&mut self) -> Option<Frame> {
|
||||
// described above
|
||||
}
|
||||
|
||||
fn deallocate_frame(&mut self, _frame: Frame) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now we only need a constructor function to make the allocator usable:
|
||||
|
||||
```rust
|
||||
pub fn new(kernel_start: usize, kernel_end: usize,
|
||||
multiboot_start: usize, multiboot_end: usize,
|
||||
memory_areas: MemoryAreaIter) -> AreaFrameAllocator
|
||||
{
|
||||
let mut allocator = AreaFrameAllocator {
|
||||
next_free_frame: Frame::containing_address(0),
|
||||
current_area: None,
|
||||
areas: memory_areas,
|
||||
kernel_start: Frame::containing_address(kernel_start),
|
||||
kernel_end: Frame::containing_address(kernel_end),
|
||||
multiboot_start: Frame::containing_address(multiboot_start),
|
||||
multiboot_end: Frame::containing_address(multiboot_end),
|
||||
};
|
||||
allocator.choose_next_area();
|
||||
allocator
|
||||
}
|
||||
```
|
||||
Note that we call `choose_next_area` manually here because `allocate_frame` returns `None` as soon as `current_area` is `None`. So by calling `choose_next_area` we initialize it to the area with the minimal base address.
|
||||
|
||||
### Testing it
|
||||
In order to test it in main, we need to [re-export] the `AreaFrameAllocator` in the `memory` module. Then we can create a new allocator:
|
||||
|
||||
[re-export]: https://doc.rust-lang.org/book/crates-and-modules.html#re-exporting-with-pub-use
|
||||
|
||||
```rust
|
||||
let mut frame_allocator = memory::AreaFrameAllocator::new(
|
||||
kernel_start as usize, kernel_end as usize, multiboot_start,
|
||||
multiboot_end, memory_map_tag.memory_areas());
|
||||
```
|
||||
|
||||
Now we can test it by adding some frame allocations:
|
||||
|
||||
```rust
|
||||
println!("{:?}", frame_allocator.allocate_frame());
|
||||
```
|
||||
You will see that the frame number starts at `0` and increases steadily, but the kernel and Multiboot frames are left out (you need to allocate many frames to see this since the kernel starts at frame 256).
|
||||
|
||||
The following `for` loop allocates all frames and prints out the total number of allocated frames:
|
||||
|
||||
```rust
|
||||
for i in 0.. {
|
||||
if let None = frame_allocator.allocate_frame() {
|
||||
println!("allocated {} frames", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
```
|
||||
You can try different amounts of memory by passing e.g. `-m 500M` to QEMU. To compare these numbers, [WolframAlpha] can be very helpful.
|
||||
|
||||
[WolframAlpha]: http://www.wolframalpha.com/input/?i=%2832605+*+4096%29+bytes+in+MiB
|
||||
|
||||
## Conclusion
|
||||
|
||||
Now we have a working frame allocator. It is a bit rudimentary and cannot free frames, but it also is very fast since it reuses the Multiboot memory map and does not need any costly initialization. A future post will build upon this allocator and add a stack-like data structure for freed frames.
|
||||
|
||||
## What's next?
|
||||
The [next post] will be about paging again. We will use the frame allocator to create a safe module that allows us to switch page tables and map pages. Then we will use this module and the information from the Elf-sections tag to remap the kernel correctly.
|
||||
|
||||
[next post]: {{% relref "2015-12-09-page-tables.md" %}}
|
||||
|
||||
## Recommended Posts
|
||||
Eric Kidd started the [Bare Metal Rust] series last week. Like this post, it builds upon the code from [Printing to Screen], but tries to support keyboard input instead of wrestling through memory management details.
|
||||
|
||||
[Bare Metal Rust]: http://www.randomhacks.net/bare-metal-rust/
|
||||
[Printing to Screen]: {{% relref "2015-10-23-printing-to-screen.md" %}}
|
||||
@@ -1,947 +0,0 @@
|
||||
+++
|
||||
title = "Page Tables"
|
||||
slug = "modifying-page-tables"
|
||||
date = "2015-12-09"
|
||||
+++
|
||||
|
||||
In this post we will create a paging module, which allows us to access and modify the 4-level page table. We will explore recursive page table mapping and use some Rust features to make it safe. Finally we will create functions to translate virtual addresses and to map and unmap pages.
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
You can find the source code and this post itself on [Github][source repository]. Please file an issue there if you have any problems or improvement suggestions. There is also a comment section at the end of this page. Note that this post requires a current Rust nightly.
|
||||
|
||||
[source repository]: https://github.com/phil-opp/blog_os/tree/page_tables
|
||||
|
||||
## Paging
|
||||
_Paging_ is a memory management scheme that separates virtual and physical memory. The address space is split into equal sized _pages_ and _page tables_ specify which virtual page points to which physical frame. For an extensive paging introduction take a look at the paging chapter ([PDF][paging chapter]) of the [Three Easy Pieces] OS book.
|
||||
|
||||
[paging chapter]: http://pages.cs.wisc.edu/~remzi/OSTEP/vm-paging.pdf
|
||||
[Three Easy Pieces]: http://pages.cs.wisc.edu/~remzi/OSTEP/
|
||||
|
||||
The x86 architecture uses a 4-level page table in 64-bit mode. A virtual address has the following structure:
|
||||
|
||||

|
||||
|
||||
The bits 48–63 are so-called _sign extension_ bits and must be copies of bit 47. The following 36 bits define the page table indexes (9 bits per table) and the last 12 bits specify the offset in the 4KiB page.
|
||||
|
||||
Each table has 2^9 = 512 entries and each entry is 8 byte. Thus a page table fits exactly in one page (4 KiB).
|
||||
|
||||
To translate an address, the CPU reads the P4 address from the CR3 register. Then it uses the indexes to walk the tables:
|
||||
|
||||

|
||||
|
||||
The P4 entry points to a P3 table, where the next 9 bits of the address are used to select an entry. The P3 entry then points to a P2 table and the P2 entry points to a P1 table. The P1 entry, which is specified through bits 12–20, finally points to the physical frame.
|
||||
|
||||
## A Basic Paging Module
|
||||
Let's create a basic paging module in `memory/paging/mod.rs`:
|
||||
|
||||
```rust
|
||||
use memory::PAGE_SIZE; // needed later
|
||||
|
||||
const ENTRY_COUNT: usize = 512;
|
||||
|
||||
pub type PhysicalAddress = usize;
|
||||
pub type VirtualAddress = usize;
|
||||
|
||||
pub struct Page {
|
||||
number: usize,
|
||||
}
|
||||
```
|
||||
We import the `PAGE_SIZE` and define a constant for the number of entries per table. To make future function signatures more expressive, we can use the type aliases `PhysicalAddress` and `VirtualAddress`. The `Page` struct is similar to the `Frame` struct in the [previous post], but represents a virtual page instead of a physical frame.
|
||||
|
||||
[previous post]: {{% relref "2015-11-15-allocating-frames.md#a-memory-module" %}}
|
||||
|
||||
### Page Table Entries
|
||||
To model page table entries, we create a new `entry` submodule:
|
||||
|
||||
```rust
|
||||
use memory::Frame; // needed later
|
||||
|
||||
pub struct Entry(u64);
|
||||
|
||||
impl Entry {
|
||||
pub fn is_unused(&self) -> bool {
|
||||
self.0 == 0
|
||||
}
|
||||
|
||||
pub fn set_unused(&mut self) {
|
||||
self.0 = 0;
|
||||
}
|
||||
}
|
||||
```
|
||||
We define that an unused entry is completely 0. That allows us to distinguish unused entries from other non-present entries in the future. For example, we could define one of the available bits as the `swapped_out` bit for pages that are swapped to disk.
|
||||
|
||||
Next we will model the contained physical address and the various flags. Remember, entries have the following format:
|
||||
|
||||
Bit(s) | Name | Meaning
|
||||
--------------------- | ------ | ----------------------------------
|
||||
0 | present | the page is currently in memory
|
||||
1 | writable | it's allowed to write to this page
|
||||
2 | user accessible | if not set, only kernel mode code can access this page
|
||||
3 | write through caching | writes go directly to memory
|
||||
4 | disable cache | no cache is used for this page
|
||||
5 | accessed | the CPU sets this bit when this page is used
|
||||
6 | dirty | the CPU sets this bit when a write to this page occurs
|
||||
7 | huge page/null | must be 0 in P1 and P4, creates a 1GiB page in P3, creates a 2MiB page in P2
|
||||
8 | global | page isn't flushed from caches on address space switch (PGE bit of CR4 register must be set)
|
||||
9-11 | available | can be used freely by the OS
|
||||
12-51 | physical address | the page aligned 52bit physical address of the frame or the next page table
|
||||
52-62 | available | can be used freely by the OS
|
||||
63 | no execute | forbid executing code on this page (the NXE bit in the EFER register must be set)
|
||||
|
||||
To model the various flags, we will use the [bitflags] crate. To add it as a dependency, add the following to your `Cargo.toml`:
|
||||
|
||||
[bitflags]: https://github.com/rust-lang-nursery/bitflags
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
...
|
||||
bitflags = "0.7.0"
|
||||
```
|
||||
|
||||
To import the macro, we need to use `#[macro_use]` above the `extern crate` definition:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
```
|
||||
|
||||
Now we can model the various flags:
|
||||
|
||||
```rust
|
||||
bitflags! {
|
||||
pub flags EntryFlags: u64 {
|
||||
const PRESENT = 1 << 0,
|
||||
const WRITABLE = 1 << 1,
|
||||
const USER_ACCESSIBLE = 1 << 2,
|
||||
const WRITE_THROUGH = 1 << 3,
|
||||
const NO_CACHE = 1 << 4,
|
||||
const ACCESSED = 1 << 5,
|
||||
const DIRTY = 1 << 6,
|
||||
const HUGE_PAGE = 1 << 7,
|
||||
const GLOBAL = 1 << 8,
|
||||
const NO_EXECUTE = 1 << 63,
|
||||
}
|
||||
}
|
||||
```
|
||||
To extract the flags from the entry we create an `Entry::flags` method that uses [from_bits_truncate]:
|
||||
|
||||
[from_bits_truncate]: https://doc.rust-lang.org/bitflags/bitflags/macro.bitflags!.html#methods
|
||||
|
||||
```rust
|
||||
pub fn flags(&self) -> EntryFlags {
|
||||
EntryFlags::from_bits_truncate(self.0)
|
||||
}
|
||||
```
|
||||
This allows us to check for flags through the `contains()` function. For example, `flags().contains(PRESENT | WRITABLE)` returns true if the entry contains _both_ flags.
|
||||
|
||||
To extract the physical address, we add a `pointed_frame` method:
|
||||
|
||||
```rust
|
||||
pub fn pointed_frame(&self) -> Option<Frame> {
|
||||
if self.flags().contains(PRESENT) {
|
||||
Some(Frame::containing_address(
|
||||
self.0 as usize & 0x000fffff_fffff000
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
```
|
||||
If the entry is present, we mask bits 12–51 and return the corresponding frame. If the entry is not present, it does not point to a valid frame so we return `None`.
|
||||
|
||||
To modify entries, we add a `set` method that updates the flags and the pointed frame:
|
||||
|
||||
```rust
|
||||
pub fn set(&mut self, frame: Frame, flags: EntryFlags) {
|
||||
assert!(frame.start_address() & !0x000fffff_fffff000 == 0);
|
||||
self.0 = (frame.start_address() as u64) | flags.bits();
|
||||
}
|
||||
```
|
||||
The start address of a frame should be page aligned and smaller than 2^52 (since x86 uses 52bit physical addresses). Since an invalid address could mess up the entry, we add an assertion. To actually set the entry, we just need to `or` the start address and the flag bits.
|
||||
|
||||
The missing `Frame::start_address` method is pretty simple:
|
||||
|
||||
```rust
|
||||
use self::paging::PhysicalAddress;
|
||||
|
||||
fn start_address(&self) -> PhysicalAddress {
|
||||
self.number * PAGE_SIZE
|
||||
}
|
||||
```
|
||||
We add it to the `impl Frame` block in `memory/mod.rs`.
|
||||
|
||||
### Page Tables
|
||||
To model page tables, we create a basic `Table` struct in a new `table` submodule:
|
||||
|
||||
```rust
|
||||
use memory::paging::entry::*;
|
||||
use memory::paging::ENTRY_COUNT;
|
||||
|
||||
pub struct Table {
|
||||
entries: [Entry; ENTRY_COUNT],
|
||||
}
|
||||
```
|
||||
It's just an array of 512 page table entries.
|
||||
|
||||
To make the `Table` indexable itself, we can implement the `Index` and `IndexMut` traits:
|
||||
|
||||
```rust
|
||||
use core::ops::{Index, IndexMut};
|
||||
|
||||
impl Index<usize> for Table {
|
||||
type Output = Entry;
|
||||
|
||||
fn index(&self, index: usize) -> &Entry {
|
||||
&self.entries[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<usize> for Table {
|
||||
fn index_mut(&mut self, index: usize) -> &mut Entry {
|
||||
&mut self.entries[index]
|
||||
}
|
||||
}
|
||||
```
|
||||
Now it's possible to get the 42th entry through `some_table[42]`. Of course we could replace `usize` with `u32` or even `u16` here but it would cause more numerical conversions (`x as u16`).
|
||||
|
||||
Let's add a method that sets all entries to unused. We will need it when we create new page tables in the future. The method looks like this:
|
||||
|
||||
```rust
|
||||
pub fn zero(&mut self) {
|
||||
for entry in self.entries.iter_mut() {
|
||||
entry.set_unused();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now we can read page tables and retrieve the mapping information. We can also update them through the `IndexMut` trait and the `Entry::set` method. But how do we get references to the various page tables?
|
||||
|
||||
We could read the `CR3` register to get the physical address of the P4 table and read its entries to get the P3 addresses. The P3 entries then point to the P2 tables and so on. But this method only works for identity-mapped pages. In the future we will create new page tables, which aren't in the identity-mapped area anymore. Since we can't access them through their physical address, we need a way to map them to virtual addresses.
|
||||
|
||||
## Mapping Page Tables
|
||||
So how do we map the page tables itself? We don't have that problem for the current P4, P3, and P2 table since they are part of the identity-mapped area, but we need a way to access future tables, too.
|
||||
|
||||
One solution is to identity map all page tables. That way we would not need to differentiate virtual and physical addresses and could easily access the tables. But it clutters the virtual address space and increases fragmentation. And it makes creating page tables much more complicated since we need a physical frame whose corresponding page isn't already used for something else.
|
||||
|
||||
An alternative solution is to map the page tables only temporary. To read/write a page table, we would map it to some free virtual address until we're done. We could use a small pool of such virtual addresses and reuse them for various tables. This method occupies only few virtual addresses and thus is a good solution for 32-bit systems, which have small address spaces. But it makes things much more complicated since we need to temporary map up to 4 tables to access a single page. And the temporary mapping requires modification of other page tables, which need to be mapped, too.
|
||||
|
||||
We will solve the problem in another way using a trick called _recursive mapping_.
|
||||
|
||||
### Recursive Mapping
|
||||
The trick is to map the P4 table recursively: The last entry doesn't point to a P3 table, but to the P4 table itself. We can use this entry to remove a translation level so that we land on a page table instead. For example, we can “loop” once to access a P1 table:
|
||||
|
||||

|
||||
|
||||
By selecting the 511th P4 entry, which points points to the P4 table itself, the P4 table is used as the P3 table. Similarly, the P3 table is used as a P2 table and the P2 table is treated like a P1 table. Thus the P1 table becomes the target page and can be accessed through the offset.
|
||||
|
||||
It's also possible to access P2 tables by looping twice. And if we select the 511th entry three times, we can access and modify P3 tables:
|
||||
|
||||

|
||||
|
||||
So we just need to specify the desired P3 table in the address through the P1 index. By choosing the 511th entry multiple times, we stay on the P4 table until the address's P1 index becomes the actual P4 index.
|
||||
|
||||
To access the P4 table itself, we loop once more and thus never leave the frame:
|
||||
|
||||

|
||||
|
||||
So we can access and modify page tables of all levels by just setting one P4 entry once. Most work is done by the CPU, we just the recursive entry to remove one or more translation levels. It may seem a bit strange at first, but it's a clean and simple solution once you wrapped your head around it.
|
||||
|
||||
By using recursive mapping, each page table is accessible through an unique virtual address. The math checks out, too: If all page tables are used, there is 1 P4 table, 511 P3 tables (the last entry is used for the recursive mapping), `511*512` P2 tables, and `511*512*512` P1 tables. So there are `134217728` page tables altogether. Each page table occupies 4KiB, so we need `134217728 * 4KiB = 512GiB` to store them. That's exactly the amount of memory that can be accessed through one P4 entry since `4KiB per page * 512 P1 entries * 512 P2 entries * 512 P3 entries = 512GiB`.
|
||||
|
||||
Of course recursive mapping has some disadvantages, too. It occupies a P4 entry and thus 512GiB of the virtual address space. But since we're in long mode and have a 48-bit address space, there are still 225.5TiB left. The bigger problem is that only the active table can be modified by default. To access another table, the recursive entry needs to be replaced temporary. We will tackle this problem in the next post when we switch to a new page table.
|
||||
|
||||
### Implementation
|
||||
To map the P4 table recursively, we just need to point the 511th entry to the table itself. Of course we could do it in Rust, but it would require some fiddling with unsafe pointers. It's easier to just add some lines to our boot assembly:
|
||||
|
||||
```nasm
|
||||
mov eax, p4_table
|
||||
or eax, 0b11 ; present + writable
|
||||
mov [p4_table + 511 * 8], eax
|
||||
```
|
||||
I put it right after the `set_up_page_tables` label, but you can add it wherever you like.
|
||||
|
||||
Now we can use special virtual addresses to access the page tables. The P4 table is available at `0xfffffffffffff000`. Let's add a P4 constant to the `table` submodule:
|
||||
|
||||
```rust
|
||||
pub const P4: *mut Table = 0xffffffff_fffff000 as *mut _;
|
||||
```
|
||||
|
||||
Let's switch to the octal system, since it makes more sense for the other special addresses. The P4 address from above is equivalent to `0o177777_777_777_777_777_0000` in octal. You can see that is has index `777` in all tables and offset `0000`. The `177777` bits on the left are the sign extension bits, which are copies of the 47th bit. They are required because x86 only uses 48bit virtual addresses.
|
||||
|
||||
The other tables can be accessed through the following addresses:
|
||||
|
||||
Table | Address | Indexes
|
||||
----- | ------------------------------- | ----------------------------------
|
||||
P4 | `0o177777_777_777_777_777_0000` | –
|
||||
P3 | `0o177777_777_777_777_XXX_0000` | `XXX` is the P4 index
|
||||
P2 | `0o177777_777_777_XXX_YYY_0000` | like above, and `YYY` is the P3 index
|
||||
P1 | `0o177777_777_XXX_YYY_ZZZ_0000` | like above, and `ZZZ` is the P2 index
|
||||
|
||||
If we look closely, we can see that the P3 address is equal to `(P4 << 9) | XXX_0000`. And the P2 address is calculated through `(P3 << 9) | YYY_0000`. So to get the next address, we need to shift it 9 bits to the left and add the table index. As a formula:
|
||||
|
||||
```
|
||||
next_table_address = (table_address << 9) | (index << 12)
|
||||
```
|
||||
|
||||
### The `next_table` Methods
|
||||
Let's add the above formula as a `Table` method:
|
||||
|
||||
```rust
|
||||
fn next_table_address(&self, index: usize) -> Option<usize> {
|
||||
let entry_flags = self[index].flags();
|
||||
if entry_flags.contains(PRESENT) && !entry_flags.contains(HUGE_PAGE) {
|
||||
let table_address = self as *const _ as usize;
|
||||
Some((table_address << 9) | (index << 12))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
```
|
||||
The next table address is only valid if the corresponding entry is present and does not create a huge page. Then we can do some pointer casting to get the table address and use the formula to calculate the next address.
|
||||
|
||||
If the index is out of bounds, the function will panic since Rust checks array bounds. The panic is desired here since a wrong index should not be possible and indicates a bug.
|
||||
|
||||
To convert the address into references, we add two functions:
|
||||
|
||||
```rust
|
||||
pub fn next_table(&self, index: usize) -> Option<&Table> {
|
||||
self.next_table_address(index)
|
||||
.map(|address| unsafe { &*(address as *const _) })
|
||||
}
|
||||
|
||||
pub fn next_table_mut(&mut self, index: usize) -> Option<&mut Table> {
|
||||
self.next_table_address(index)
|
||||
.map(|address| unsafe { &mut *(address as *mut _) })
|
||||
}
|
||||
```
|
||||
We convert the address into raw pointers through `as` casts and then convert them into Rust references through `&mut *`. The latter is an `unsafe` operation since Rust can't guarantee that the raw pointer is valid.
|
||||
|
||||
Note that `self` stays borrowed as long as the returned reference is valid. This is because of Rust's [lifetime elision] rules. Basically, these rules say that the lifetime of an output reference is the same as the lifetime of the input reference by default. So the above function signatures are expanded to:
|
||||
|
||||
[lifetime elision]: https://doc.rust-lang.org/book/lifetimes.html#lifetime-elision
|
||||
|
||||
```rust
|
||||
pub fn next_table<'a>(&'a self, index: usize) -> Option<&'a Table> {...}
|
||||
|
||||
pub fn next_table_mut<'a>(&'a mut self, index: usize)
|
||||
-> Option<&'a mut Table>
|
||||
{...}
|
||||
```
|
||||
|
||||
Note the additional lifetime parameters, which are identical for input and output references. That's exactly what we want. It ensures that we can't modify tables as long as we have references to lower tables. For example, it would be very bad if we could unmap a P3 table if we still write to one of its P2 tables.
|
||||
|
||||
#### Safety
|
||||
Now we can start at the `P4` constant and use the `next_table` functions to access the lower tables. And we don't even need `unsafe` blocks to do it! Right now, your alarm bells should be ringing. Thanks to Rust, everything we've done before in this post was completely safe. But we just introduced two unsafe blocks to convince Rust that there are valid tables at the specified addresses. Can we really be sure?
|
||||
|
||||
First, these addresses are only valid if the P4 table is mapped recursively. Since the paging module will be the only module that modifies page tables, we can introduce an invariant for the module:
|
||||
|
||||
> _The 511th entry of the active P4 table must always be mapped to the active P4 table itself._
|
||||
|
||||
So if we switch to another P4 table at some time, it needs to be identity mapped _before_ it becomes active. As long as we obey this invariant, we can safely use the special addresses. But even with this invariant, there is a big problem with the two methods:
|
||||
|
||||
_What happens if we call them on a P1 table?_
|
||||
|
||||
Well, they would calculate the address of the next table (which does not exist) and treat it as a page table. Either they construct an invalid address (if `XXX < 400`)[^fn-invalid-address] or access the mapped page itself. That way, we could easily corrupt memory or cause CPU exceptions by accident. So these two functions are not _safe_ in Rust terms. Thus we need to make them `unsafe` functions unless we find some clever solution.
|
||||
|
||||
[^fn-invalid-address]: If the `XXX` part of the address is smaller than `0o400`, it's binary representation doesn't start with `1`. But the sign extension bits, which should be a copy of that bit, are `1` instead of `0`. Thus the address is not valid.
|
||||
|
||||
## Some Clever Solution
|
||||
We can use Rust's type system to statically guarantee that the `next_table` methods can only be called on P4, P3, and P2 tables, but not on a P1 table. The idea is to add a `Level` parameter to the `Table` type and implement the `next_table` methods only for level 4, 3, and 2.
|
||||
|
||||
To model the levels we use a trait and empty enums:
|
||||
|
||||
```rust
|
||||
pub trait TableLevel {}
|
||||
|
||||
pub enum Level4 {}
|
||||
pub enum Level3 {}
|
||||
pub enum Level2 {}
|
||||
pub enum Level1 {}
|
||||
|
||||
impl TableLevel for Level4 {}
|
||||
impl TableLevel for Level3 {}
|
||||
impl TableLevel for Level2 {}
|
||||
impl TableLevel for Level1 {}
|
||||
```
|
||||
An empty enum has size zero and disappears completely after compiling. Unlike an empty struct, it's not possible to instantiate an empty enum. Since we will use `TableLevel` and the table levels in exported types, they need to be public.
|
||||
|
||||
To differentiate the P1 table from the other tables, we introduce a `HierarchicalLevel` trait, which is a subtrait of `TableLevel`. But we implement it only for the levels 4, 3, and 2:
|
||||
|
||||
```rust
|
||||
pub trait HierarchicalLevel: TableLevel {}
|
||||
|
||||
impl HierarchicalLevel for Level4 {}
|
||||
impl HierarchicalLevel for Level3 {}
|
||||
impl HierarchicalLevel for Level2 {}
|
||||
```
|
||||
|
||||
Now we add the level parameter to the `Table` type:
|
||||
|
||||
```rust
|
||||
use core::marker::PhantomData;
|
||||
|
||||
pub struct Table<L: TableLevel> {
|
||||
entries: [Entry; ENTRY_COUNT],
|
||||
level: PhantomData<L>,
|
||||
}
|
||||
```
|
||||
We need to add a [PhantomData] field because unused type parameters are not allowed in Rust.
|
||||
|
||||
[PhantomData]: https://doc.rust-lang.org/core/marker/struct.PhantomData.html#unused-type-parameters
|
||||
|
||||
Since we changed the `Table` type, we need to update every use of it:
|
||||
|
||||
```rust
|
||||
pub const P4: *mut Table<Level4> = 0xffffffff_fffff000 as *mut _;
|
||||
...
|
||||
impl<L> Table<L> where L: TableLevel
|
||||
{
|
||||
pub fn zero(&mut self) {...}
|
||||
}
|
||||
|
||||
impl<L> Table<L> where L: HierarchicalLevel
|
||||
{
|
||||
pub fn next_table(&self, index: usize) -> Option<&Table<???>> {...}
|
||||
|
||||
pub fn next_table_mut(&mut self, index: usize) -> Option<&mut Table<???>>
|
||||
{...}
|
||||
|
||||
fn next_table_address(&self, index: usize) -> Option<usize> {...}
|
||||
}
|
||||
|
||||
impl<L> Index<usize> for Table<L> where L: TableLevel {...}
|
||||
|
||||
impl<L> IndexMut<usize> for Table<L> where L: TableLevel {...}
|
||||
```
|
||||
Now the `next_table` methods are only available for P4, P3, and P2 tables. But they have the incomplete return type `Table<???>` now. What should we fill in for the `???`?
|
||||
|
||||
For a P4 table we would like to return a `Table<Level3>`, for a P3 table a `Table<Level2>`, and for a P2 table a `Table<Level1>`. So we want to return a table of the _next level_.
|
||||
|
||||
We can define the next level by adding an associated type to the `HierarchicalLevel` trait:
|
||||
|
||||
```rust
|
||||
trait HierarchicalLevel: TableLevel {
|
||||
type NextLevel: TableLevel;
|
||||
}
|
||||
|
||||
impl HierarchicalLevel for Level4 {
|
||||
type NextLevel = Level3;
|
||||
}
|
||||
|
||||
impl HierarchicalLevel for Level3 {
|
||||
type NextLevel = Level2;
|
||||
}
|
||||
|
||||
impl HierarchicalLevel for Level2 {
|
||||
type NextLevel = Level1;
|
||||
}
|
||||
```
|
||||
|
||||
Now we can replace the `Table<???>` types with `Table<L::NextLevel>` types and our code works as intended. You can try it with a simple test function:
|
||||
|
||||
```rust
|
||||
fn test() {
|
||||
let p4 = unsafe { &*P4 };
|
||||
p4.next_table(42)
|
||||
.and_then(|p3| p3.next_table(1337))
|
||||
.and_then(|p2| p2.next_table(0xdeadbeaf))
|
||||
.and_then(|p1| p1.next_table(0xcafebabe))
|
||||
}
|
||||
```
|
||||
Most of the indexes are completely out of bounds, so it would panic if it's called. But we don't need to call it since it already fails at compile time:
|
||||
|
||||
```
|
||||
error: no method named `next_table` found for type
|
||||
`&memory::paging::table::Table<memory::paging::table::Level1>`
|
||||
in the current scope
|
||||
```
|
||||
Remember that this is bare metal kernel code. We just used type system magic to make low-level page table manipulations safer. Rust is just awesome!
|
||||
|
||||
## Translating Addresses
|
||||
Now let's do something useful with our new module. We will create a function that translates a virtual address to the corresponding physical address. We add it to the `paging/mod.rs` module:
|
||||
|
||||
```rust
|
||||
pub fn translate(virtual_address: VirtualAddress)
|
||||
-> Option<PhysicalAddress>
|
||||
{
|
||||
let offset = virtual_address % PAGE_SIZE;
|
||||
translate_page(Page::containing_address(virtual_address))
|
||||
.map(|frame| frame.number * PAGE_SIZE + offset)
|
||||
}
|
||||
```
|
||||
It uses two functions we haven't defined yet: `translate_page` and `Page::containing_address`. Let's start with the latter:
|
||||
|
||||
```rust
|
||||
pub fn containing_address(address: VirtualAddress) -> Page {
|
||||
assert!(address < 0x0000_8000_0000_0000 ||
|
||||
address >= 0xffff_8000_0000_0000,
|
||||
"invalid address: 0x{:x}", address);
|
||||
Page { number: address / PAGE_SIZE }
|
||||
}
|
||||
```
|
||||
The assertion is needed because there can be invalid addresses. Addresses on x86 are just 48-bit long and the other bits are just _sign extension_, i.e. a copy of the most significant bit. For example:
|
||||
|
||||
```
|
||||
invalid address: 0x0000_8000_0000_0000
|
||||
valid address: 0xffff_8000_0000_0000
|
||||
└── bit 47
|
||||
```
|
||||
So the address space is split into two halves: the _higher half_ containing addresses with sign extension and the _lower half_ containing addresses without. Everything in between is invalid.
|
||||
|
||||
Since we added `containing_address`, we add the inverse method as well (maybe we need it later):
|
||||
|
||||
```rust
|
||||
fn start_address(&self) -> usize {
|
||||
self.number * PAGE_SIZE
|
||||
}
|
||||
```
|
||||
|
||||
The other missing function, `translate_page`, looks like this:
|
||||
|
||||
```rust
|
||||
use memory::Frame;
|
||||
|
||||
fn translate_page(page: Page) -> Option<Frame> {
|
||||
use self::entry::HUGE_PAGE;
|
||||
|
||||
let p3 = unsafe { &*table::P4 }.next_table(page.p4_index());
|
||||
|
||||
let huge_page = || {
|
||||
// TODO
|
||||
};
|
||||
|
||||
p3.and_then(|p3| p3.next_table(page.p3_index()))
|
||||
.and_then(|p2| p2.next_table(page.p2_index()))
|
||||
.and_then(|p1| p1[page.p1_index()].pointed_frame())
|
||||
.or_else(huge_page)
|
||||
}
|
||||
```
|
||||
We use an unsafe block to convert the raw `P4` pointer to a reference. Then we use the [Option::and_then] function to go through the four table levels. If some entry along the way is `None`, we check if the page is a huge page through the (unimplemented) `huge_page` closure.
|
||||
|
||||
The `Page::p*_index` functions return the different table indexes. They look like this:
|
||||
|
||||
[Option::and_then]: https://doc.rust-lang.org/nightly/core/option/enum.Option.html#method.and_then
|
||||
|
||||
```rust
|
||||
fn p4_index(&self) -> usize {
|
||||
(self.number >> 27) & 0o777
|
||||
}
|
||||
fn p3_index(&self) -> usize {
|
||||
(self.number >> 18) & 0o777
|
||||
}
|
||||
fn p2_index(&self) -> usize {
|
||||
(self.number >> 9) & 0o777
|
||||
}
|
||||
fn p1_index(&self) -> usize {
|
||||
(self.number >> 0) & 0o777
|
||||
}
|
||||
```
|
||||
|
||||
### Safety
|
||||
We use an `unsafe` block to convert the raw `P4` pointer into a shared reference. It's safe because we don't create any `&mut` references to the table right now and don't switch the P4 table either. But as soon as we do something like that, we have to revisit this method.
|
||||
|
||||
### Huge Pages
|
||||
|
||||
The `huge_page` closure calculates the corresponding frame if huge pages are used. Its content looks like this:
|
||||
|
||||
```rust
|
||||
p3.and_then(|p3| {
|
||||
let p3_entry = &p3[page.p3_index()];
|
||||
// 1GiB page?
|
||||
if let Some(start_frame) = p3_entry.pointed_frame() {
|
||||
if p3_entry.flags().contains(HUGE_PAGE) {
|
||||
// address must be 1GiB aligned
|
||||
assert!(start_frame.number % (ENTRY_COUNT * ENTRY_COUNT) == 0);
|
||||
return Some(Frame {
|
||||
number: start_frame.number + page.p2_index() *
|
||||
ENTRY_COUNT + page.p1_index(),
|
||||
});
|
||||
}
|
||||
}
|
||||
if let Some(p2) = p3.next_table(page.p3_index()) {
|
||||
let p2_entry = &p2[page.p2_index()];
|
||||
// 2MiB page?
|
||||
if let Some(start_frame) = p2_entry.pointed_frame() {
|
||||
if p2_entry.flags().contains(HUGE_PAGE) {
|
||||
// address must be 2MiB aligned
|
||||
assert!(start_frame.number % ENTRY_COUNT == 0);
|
||||
return Some(Frame {
|
||||
number: start_frame.number + page.p1_index()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
```
|
||||
This function is much longer and more complex than the `translate_page` function itself. To avoid this complexity in the future, we will only work with standard 4KiB pages from now on.
|
||||
|
||||
## Mapping Pages
|
||||
Let's add a function that modifies the page tables to map a `Page` to a `Frame`:
|
||||
|
||||
```rust
|
||||
pub use self::entry::*;
|
||||
use memory::FrameAllocator;
|
||||
|
||||
pub fn map_to<A>(page: Page, frame: Frame, flags: EntryFlags,
|
||||
allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
let p4 = unsafe { &mut *P4 };
|
||||
let mut p3 = p4.next_table_create(page.p4_index(), allocator);
|
||||
let mut p2 = p3.next_table_create(page.p3_index(), allocator);
|
||||
let mut p1 = p2.next_table_create(page.p2_index(), allocator);
|
||||
|
||||
assert!(p1[page.p1_index()].is_unused());
|
||||
p1[page.p1_index()].set(frame, flags | PRESENT);
|
||||
}
|
||||
```
|
||||
We add an reexport for all `entry` types since they are required to call the function. We assert that the page is unmapped and always set the present flag (since it wouldn't make sense to map a page without setting it).
|
||||
|
||||
The `Table::next_table_create` method doesn't exist yet. It should return the next table if it exists, or create a new one. Therefor we need the `FrameAllocator` from the [previous post] and the `Table::zero` method:
|
||||
|
||||
```rust
|
||||
use memory::FrameAllocator;
|
||||
|
||||
pub fn next_table_create<A>(&mut self,
|
||||
index: usize,
|
||||
allocator: &mut A)
|
||||
-> &mut Table<L::NextLevel>
|
||||
where A: FrameAllocator
|
||||
{
|
||||
if self.next_table(index).is_none() {
|
||||
assert!(!self.entries[index].flags().contains(HUGE_PAGE),
|
||||
"mapping code does not support huge pages");
|
||||
let frame = allocator.allocate_frame().expect("no frames available");
|
||||
self.entries[index].set(frame, PRESENT | WRITABLE);
|
||||
self.next_table_mut(index).unwrap().zero();
|
||||
}
|
||||
self.next_table_mut(index).unwrap()
|
||||
}
|
||||
```
|
||||
We can use `unwrap()` here since the next table definitely exists.
|
||||
|
||||
### Safety
|
||||
We used an `unsafe` block in `map_to` to convert the raw `P4` pointer to a `&mut` reference. That's bad. It's now possible that the `&mut` reference is not exclusive, which breaks Rust's guarantees. It's only a matter time before we run into a data race. For example, imagine that one thread maps an entry to `frame_A` and another thread (on the same core) tries to map the same entry to `frame_B`.
|
||||
|
||||
The problem is that there's no clear _owner_ for the page tables. So let's define page table ownership!
|
||||
|
||||
### Page Table Ownership
|
||||
We define the following:
|
||||
|
||||
> A page table owns all of its subtables.
|
||||
|
||||
We already obey this rule: To get a reference to a table, we need to borrow it from its parent table through the `next_table` method. But who owns the P4 table?
|
||||
|
||||
> The recursively mapped P4 table is owned by a `ActivePageTable` struct.
|
||||
|
||||
We just defined some random owner for the P4 table. But it will solve our problems. And it will also provide the interface to other modules.
|
||||
|
||||
So let's create the struct:
|
||||
|
||||
```rust
|
||||
use self::table::{Table, Level4};
|
||||
use core::ptr::Unique;
|
||||
|
||||
pub struct ActivePageTable {
|
||||
p4: Unique<Table<Level4>>,
|
||||
}
|
||||
```
|
||||
We can't store the `Table<Level4>` directly because it needs to be at a special memory location (like the [VGA text buffer]). We could use a raw pointer or `&mut` instead of [Unique], but Unique indicates ownership better.
|
||||
|
||||
[VGA text buffer]: {{% relref "2015-10-23-printing-to-screen.md#the-text-buffer" %}}
|
||||
[Unique]: https://doc.rust-lang.org/nightly/core/ptr/struct.Unique.html
|
||||
|
||||
Because the `ActivePageTable` owns the unique recursive mapped P4 table, there must be only one `ActivePageTable` instance. Thus we make the constructor function unsafe:
|
||||
|
||||
```rust
|
||||
impl ActivePageTable {
|
||||
pub unsafe fn new() -> ActivePageTable {
|
||||
ActivePageTable {
|
||||
p4: Unique::new(table::P4),
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
We add some methods to get P4 references:
|
||||
|
||||
```rust
|
||||
fn p4(&self) -> &Table<Level4> {
|
||||
unsafe { self.p4.get() }
|
||||
}
|
||||
|
||||
fn p4_mut(&mut self) -> &mut Table<Level4> {
|
||||
unsafe { self.p4.get_mut() }
|
||||
}
|
||||
```
|
||||
|
||||
Since we will only create valid P4 pointers, the `unsafe` blocks are safe. However, we don't make these functions public since they can be used to make page tables invalid. Only the higher level functions (such as `translate` or `map_to`) should be usable from other modules.
|
||||
|
||||
Now we can make the `map_to` and `translate` functions safe by making them methods of `ActivePageTable`:
|
||||
|
||||
```rust
|
||||
impl ActivePageTable {
|
||||
pub unsafe fn new() -> ActivePageTable {...}
|
||||
|
||||
fn p4(&self) -> &Table<Level4> {...}
|
||||
|
||||
fn p4_mut(&mut self) -> &mut Table<Level4> {...}
|
||||
|
||||
pub fn translate(&self, virtual_address: VirtualAddress)
|
||||
-> Option<PhysicalAddress>
|
||||
{
|
||||
...
|
||||
self.translate_page(...).map(...)
|
||||
}
|
||||
|
||||
fn translate_page(&self, page: Page) -> Option<Frame> {
|
||||
let p3 = self.p4().next_table(...);
|
||||
...
|
||||
}
|
||||
|
||||
pub fn map_to<A>(&mut self,
|
||||
page: Page,
|
||||
frame: Frame,
|
||||
flags: EntryFlags,
|
||||
allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
let mut p3 = self.p4_mut().next_table_create(...);
|
||||
...
|
||||
}
|
||||
}
|
||||
```
|
||||
Now the `p4()` and `p4_mut()` methods should be the only methods containing an `unsafe` block in the `paging/mod.rs` file.
|
||||
|
||||
### More Mapping Functions
|
||||
|
||||
For convenience, we add a `map` method that just picks a free frame for us:
|
||||
|
||||
```rust
|
||||
pub fn map<A>(&mut self, page: Page, flags: EntryFlags, allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
let frame = allocator.allocate_frame().expect("out of memory");
|
||||
self.map_to(page, frame, flags, allocator)
|
||||
}
|
||||
```
|
||||
|
||||
We also add a `identity_map` function to make it easier to remap the kernel in the next post:
|
||||
|
||||
```rust
|
||||
pub fn identity_map<A>(&mut self,
|
||||
frame: Frame,
|
||||
flags: EntryFlags,
|
||||
allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
let page = Page::containing_address(frame.start_address());
|
||||
self.map_to(page, frame, flags, allocator)
|
||||
}
|
||||
```
|
||||
|
||||
### Unmapping Pages
|
||||
To unmap a page, we set the corresponding P1 entry to unused:
|
||||
|
||||
```rust
|
||||
fn unmap<A>(&mut self, page: Page, allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
assert!(self.translate(page.start_address()).is_some());
|
||||
|
||||
let p1 = self.p4_mut()
|
||||
.next_table_mut(page.p4_index())
|
||||
.and_then(|p3| p3.next_table_mut(page.p3_index()))
|
||||
.and_then(|p2| p2.next_table_mut(page.p2_index()))
|
||||
.expect("mapping code does not support huge pages");
|
||||
let frame = p1[page.p1_index()].pointed_frame().unwrap();
|
||||
p1[page.p1_index()].set_unused();
|
||||
// TODO free p(1,2,3) table if empty
|
||||
allocator.deallocate_frame(frame);
|
||||
}
|
||||
```
|
||||
The assertion ensures that the page is mapped. Thus the corresponding P1 table and frame must exist for a standard 4KiB page. We set the entry to unused and free the associated frame in the supplied frame allocator.
|
||||
|
||||
We can also free the P1, P2, or even P3 table when the last entry is freed. But checking the whole table on every `unmap` would be very expensive. So we leave the `TODO` in place until we find a good solution. I'm open for suggestions :).
|
||||
|
||||
_Spoiler_: There is an ugly bug in this function, which we will find in the next section.
|
||||
|
||||
## Testing and Bugfixing
|
||||
To test it, we add a `test_paging` function in `memory/paging/mod.rs`:
|
||||
|
||||
```rust
|
||||
pub fn test_paging<A>(allocator: &mut A)
|
||||
where A: FrameAllocator
|
||||
{
|
||||
let page_table = unsafe { ActivePageTable::new() };
|
||||
|
||||
// test it
|
||||
}
|
||||
```
|
||||
We borrow the frame allocator since we will need it for the mapping functions. To be able to call that function from main, we need to reexport it in `memory/mod.rs`:
|
||||
|
||||
```rust
|
||||
// in memory/mod.rs
|
||||
pub use self::paging::test_paging;
|
||||
|
||||
// lib.rs
|
||||
let mut frame_allocator = ...;
|
||||
memory::test_paging(&mut frame_allocator);
|
||||
```
|
||||
|
||||
### translate
|
||||
First, we translate some addresses:
|
||||
|
||||
```rust
|
||||
// address 0 is mapped
|
||||
println!("Some = {:?}", page_table.translate(0));
|
||||
// second P1 entry
|
||||
println!("Some = {:?}", page_table.translate(4096));
|
||||
// second P2 entry
|
||||
println!("Some = {:?}", page_table.translate(512 * 4096));
|
||||
// 300th P2 entry
|
||||
println!("Some = {:?}", page_table.translate(300 * 512 * 4096));
|
||||
// second P3 entry
|
||||
println!("None = {:?}", page_table.translate(512 * 512 * 4096));
|
||||
// last mapped byte
|
||||
println!("Some = {:?}", page_table.translate(512 * 512 * 4096 - 1));
|
||||
```
|
||||
Currently, the first GiB of the address space is identity-mapped. Thus all addresses in this area should translate to `Some(x)`, where `x` is the virtual address. Only the second last address, `512 * 512 * 4096`, is not in that area and should resolve to `None`.
|
||||
|
||||
But the output shows two `None` lines:
|
||||
|
||||
```
|
||||
Some = Some(0)
|
||||
Some = Some(4096)
|
||||
Some = Some(2097152)
|
||||
Some = Some(629145600)
|
||||
None = None
|
||||
Some = None
|
||||
```
|
||||
The last line is wrong. But why?
|
||||
|
||||
In fact, all addresses above `344 * 512 * 4096` seem to get translated to `None`. But even worse, there are some wrong translations, too. For example, on my machine `357 * 512 * 4096` translates to roughly `255TiB`:
|
||||
|
||||
```
|
||||
Some(280735973961728)
|
||||
```
|
||||
Something is terribly wrong here. But it's not our code.
|
||||
|
||||
The reason for this bug is a silent stack overflow. Remember, our `.bss` section in the `boot.asm` file looks like this:
|
||||
|
||||
```nasm
|
||||
section .bss
|
||||
align 4096
|
||||
p4_table:
|
||||
resb 4096
|
||||
p3_table:
|
||||
resb 4096
|
||||
p2_table:
|
||||
resb 4096
|
||||
stack_bottom:
|
||||
resb 4096
|
||||
stack_top:
|
||||
```
|
||||
So a stack overflow overwrites the P2 table, starting at the last entry. But the CPU still uses the memory as page table entries. And if the stack bytes contain the present byte, it seems to point to a frame and `translate` returns a (wrong) `Some`.
|
||||
|
||||
To fix it, we double the stack size to `4096 * 2`. Now the last byte gets translated to `Some(1073741823)` correctly. To avoid this kind of bug in the future, we need to add a guard page to the stack, which causes an exception on stack overflow. We will do that in the next post when we remap the kernel.
|
||||
|
||||
### map_to
|
||||
Let's test the `map_to` function:
|
||||
|
||||
```rust
|
||||
let addr = 42 * 512 * 512 * 4096; // 42th P3 entry
|
||||
let page = Page::containing_address(addr);
|
||||
let frame = allocator.allocate_frame().expect("no more frames");
|
||||
println!("None = {:?}, map to {:?}",
|
||||
page_table.translate(addr),
|
||||
frame);
|
||||
page_table.map_to(page, frame, EntryFlags::empty(), allocator);
|
||||
println!("Some = {:?}", page_table.translate(addr));
|
||||
println!("next free frame: {:?}", allocator.allocate_frame());
|
||||
```
|
||||
We just map some random page to a free frame. To be able to borrow the page table as `&mut`, we need to make it mutable.
|
||||
|
||||
You should see output similar to this:
|
||||
|
||||
```
|
||||
None = None, map to Frame { number: 0 }
|
||||
Some = Some(0)
|
||||
next free frame: Some(Frame { number: 3 })
|
||||
```
|
||||
It's frame 0 because it's the first frame returned by the frame allocator. Since we map the 42th P3 entry, the mapping code needs to create a P2 and a P1 table. So the next free frame returned by the allocator is frame 3.
|
||||
|
||||
### unmap
|
||||
To test the `unmap` function, we unmap the test page so that it translates to `None` again:
|
||||
|
||||
```rust
|
||||
page_table.unmap(Page::containing_address(addr), allocator);
|
||||
println!("None = {:?}", page_table.translate(addr));
|
||||
```
|
||||
It causes a panic since we call the unimplemented `deallocate_frame` method in `unwrap`. If we comment this call out, it works without problems. But there is some bug in this function nevertheless.
|
||||
|
||||
Let's read something from the mapped page (of course before we unmap it again):
|
||||
|
||||
```rust
|
||||
println!("{:#x}", unsafe {
|
||||
*(Page::containing_address(addr).start_address() as *const u64)
|
||||
});
|
||||
```
|
||||
Since we don't zero the mapped pages, the output is random. For me, it's `0xf000ff53f000ff53`.
|
||||
|
||||
If `unmap` worked correctly, reading it again after unmapping should cause a page fault. But it doesn't. Instead, it just prints the same number again. When we remove the first read, we get the desired page fault (i.e. QEMU reboots again and again). So this seems to be some cache issue.
|
||||
|
||||
An x86 processor has many different caches because always accessing the main memory would be very slow. Most of these caches are completely _transparent_. That means everything works exactly the same as without them, it's just much faster. But there is one cache, that needs to be updated manually: the _translation lookaside buffer_.
|
||||
|
||||
The translation lookaside buffer, or TLB, caches the translation of virtual to physical addresses. It's filled automatically when a page is accessed. But it's not updated transparently when the mapping of a page changes. This is the reason that we still can access the page even through we unmapped it in the page table.
|
||||
|
||||
So to fix our `unmap` function, we need to remove the cached translation from the TLB. We can use Gerd Zellweger's [x86][x86 crate] crate to do this easily. To add it, we append the following to our `Cargo.toml`:
|
||||
|
||||
[x86 crate]: https://github.com/gz/rust-x86
|
||||
|
||||
```toml
|
||||
[dependencies.x86]
|
||||
version = "0.7.1"
|
||||
default-features = false
|
||||
```
|
||||
It has a `performance-counter` feature that allows reading the CPU specific [performance counters] but increases compile times. We don't need it right now, so we disable it using `default-features = false`.
|
||||
|
||||
[performance counters]: http://gz.github.io/rust-x86/x86/perfcnt/index.html
|
||||
|
||||
Now we can use it to fix `unmap`:
|
||||
|
||||
```rust
|
||||
...
|
||||
p1[page.p1_index()].set_unused();
|
||||
unsafe {
|
||||
::x86::tlb::flush(page.start_address());
|
||||
}
|
||||
// TODO free p(1,2,3) table if empty
|
||||
//allocator.deallocate_frame(frame);
|
||||
}
|
||||
```
|
||||
Now the desired page fault occurs even when we access the page before.
|
||||
|
||||
## Conclusion
|
||||
This post has become pretty long. So let's summarize what we've done:
|
||||
|
||||
- we created a paging module and modeled page tables plus entries
|
||||
- we mapped the P4 page recursively and created `next_table` methods
|
||||
- we used empty enums and associated types to make the `next_table` functions safe
|
||||
- we wrote a function to translate virtual to physical addresses
|
||||
- we created safe functions to map and unmap pages
|
||||
- and we fixed stack overflow and TLB related bugs
|
||||
|
||||
## What's next?
|
||||
In the [next post] we will extend this module and add a function to modify inactive page tables. Through that function, we will create a new page table hierarchy that maps the kernel correctly using 4KiB pages. Then we will switch to the new table to get a safer kernel environment.
|
||||
|
||||
[next post]: {{% relref "2016-01-01-remap-the-kernel.md" %}}
|
||||
|
||||
Afterwards, we will use this paging module to build a heap allocator. This will allow us to use allocation and collection types such as `Box` and `Vec`.
|
||||
|
||||
<small>Image sources: [^virtual_physical_translation_source]</small>
|
||||
|
||||
[^virtual_physical_translation_source]: Image sources: Modified versions of an image from [Wikipedia](https://commons.wikimedia.org/wiki/File:X86_Paging_64bit.svg). The modified files are licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,840 +0,0 @@
|
||||
+++
|
||||
title = "Kernel Heap"
|
||||
date = "2016-04-11"
|
||||
+++
|
||||
|
||||
In the previous posts we have created a [frame allocator] and a [page table module]. Now we are ready to create a kernel heap and a memory allocator. Thus, we will unlock `Box`, `Vec`, `BTreeMap`, and the rest of the [alloc] and [collections] crates.
|
||||
|
||||
[frame allocator]: {{% relref "2015-11-15-allocating-frames.md" %}}
|
||||
[page table module]: {{% relref "2015-12-09-page-tables.md" %}}
|
||||
[alloc]: https://doc.rust-lang.org/nightly/alloc/index.html
|
||||
[collections]: https://doc.rust-lang.org/nightly/collections/index.html
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
As always, you can find the complete source code on [Github]. Please file [issues] for any problems, questions, or improvement suggestions. There is also a comment section at the end of this page.
|
||||
|
||||
[Github]: https://github.com/phil-opp/blog_os/tree/kernel_heap
|
||||
[issues]: https://github.com/phil-opp/blog_os/issues
|
||||
|
||||
## Introduction
|
||||
The _heap_ is the memory area for long-lived allocations. The programmer can access it by using types like [Box][Box rustbyexample] or [Vec]. Behind the scenes, the compiler manages that memory by inserting calls to some memory allocator. By default, Rust links to the [jemalloc] allocator (for binaries) or the system allocator (for libraries). However, both rely on [system calls] such as [sbrk] and are thus unusable in our kernel. So we need to create and link our own allocator.
|
||||
|
||||
[Box rustbyexample]: http://rustbyexample.com/std/box.html
|
||||
[Vec]: https://doc.rust-lang.org/book/vectors.html
|
||||
[jemalloc]: http://www.canonware.com/jemalloc/
|
||||
[system calls]: https://en.wikipedia.org/wiki/System_call
|
||||
[sbrk]: https://en.wikipedia.org/wiki/Sbrk
|
||||
|
||||
A good allocator is fast and reliable. It also effectively utilizes the available memory and keeps [fragmentation] low. Furthermore, it works well for concurrent applications and scales to any number of processors. It even optimizes the memory layout with respect to the CPU caches to improve [cache locality] and avoid [false sharing].
|
||||
|
||||
[cache locality]: http://docs.cray.com/books/S-2315-50/html-S-2315-50/qmeblljm.html
|
||||
[fragmentation]: https://en.wikipedia.org/wiki/Fragmentation_(computing)
|
||||
[false sharing]: http://mechanical-sympathy.blogspot.de/2011/07/false-sharing.html
|
||||
|
||||
These requirements make good allocators pretty complex. For example, [jemalloc] has over 30.000 lines of code. This complexity is out of scope for our kernel, so we will create a much simpler allocator. However, it should suffice for the foreseeable future, since we'll allocate only when it's absolutely necessary.
|
||||
|
||||
## A Bump Allocator
|
||||
|
||||
For our own allocator, we start simple. We create an allocator crate in a new `libs` subfolder:
|
||||
|
||||
``` shell
|
||||
> mkdir libs
|
||||
> cd libs
|
||||
> cargo new bump_allocator
|
||||
> cd bump_allocator
|
||||
```
|
||||
|
||||
Our allocator is very basic. It only keeps track of the next free address:
|
||||
|
||||
``` rust
|
||||
// in libs/bump_allocator/src/lib.rs
|
||||
|
||||
#![feature(const_fn)]
|
||||
|
||||
#[derive(Debug)]
|
||||
struct BumpAllocator {
|
||||
heap_start: usize,
|
||||
heap_size: usize,
|
||||
next: usize,
|
||||
}
|
||||
|
||||
impl BumpAllocator {
|
||||
/// Create a new allocator, which uses the memory in the
|
||||
/// range [heap_start, heap_start + heap_size).
|
||||
const fn new(heap_start: usize, heap_size: usize) -> BumpAllocator {
|
||||
BumpAllocator {
|
||||
heap_start: heap_start,
|
||||
heap_size: heap_size,
|
||||
next: heap_start,
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocates a block of memory with the given size and alignment.
|
||||
fn allocate(&mut self, size: usize, align: usize) -> Option<*mut u8> {
|
||||
let alloc_start = align_up(self.next, align);
|
||||
let alloc_end = alloc_start + size;
|
||||
|
||||
if alloc_end <= self.heap_start + self.heap_size {
|
||||
self.next = alloc_end;
|
||||
Some(alloc_start as *mut u8)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `heap_start` and `heap_size` fields just contain the start address and size of our kernel heap. The `next` field contains the next free address and is increased after every allocation. To `allocate` a memory block we align the `next` address using the `align_up` function (decribed below). Then we add up the desired `size` and make sure that we don't exceed the end of the heap. If everything goes well, we update the `next` address and return a pointer to the start address of the allocation. Else, we return `None`.
|
||||
|
||||
Note that we need to add a feature flag at the beginning of the file, because we've marked the `new` function as `const`. [Const functions] are unstable, so we need to add the `#![feature(const_fn)]` flag.
|
||||
|
||||
[Const functions]: https://github.com/rust-lang/rust/issues/24111
|
||||
|
||||
### Alignment
|
||||
In order to simplify alignment, we add `align_down` and `align_up` functions:
|
||||
|
||||
``` rust
|
||||
/// Align downwards. Returns the greatest x with alignment `align`
|
||||
/// so that x <= addr. The alignment must be a power of 2.
|
||||
pub fn align_down(addr: usize, align: usize) -> usize {
|
||||
if align.is_power_of_two() {
|
||||
addr & !(align - 1)
|
||||
} else if align == 0 {
|
||||
addr
|
||||
} else {
|
||||
panic!("`align` must be a power of 2");
|
||||
}
|
||||
}
|
||||
|
||||
/// Align upwards. Returns the smallest x with alignment `align`
|
||||
/// so that x >= addr. The alignment must be a power of 2.
|
||||
pub fn align_up(addr: usize, align: usize) -> usize {
|
||||
align_down(addr + align - 1, align)
|
||||
}
|
||||
```
|
||||
|
||||
Let's start with `align_down`: If the alignment is a valid power of two (i.e. in `{1,2,4,8,…}`), we use some bit-fiddling to return the aligned address. It works because every power of two has exactly one bit set in its binary representation. For example, the numbers `{1,2,4,8,…}` are `{1,10,100,1000,…}` in binary. By subtracting 1 we get `{0,01,011,0111,…}`. These binary numbers have a `1` at exactly the positions that need to be zeroed in `addr`. For example, the last 3 bits need to be zeroed for a alignment of 8.
|
||||
|
||||
To align `addr`, we create a [bitmask] from `align-1`. We want a `0` at the position of each `1`, so we invert it using `!`. After that, the binary numbers look like this: `{…11111,…11110,…11100,…11000,…}`. Finally, we zero the correct bits using a binary `AND`.
|
||||
|
||||
[bitmask]: https://en.wikipedia.org/wiki/Mask_(computing)
|
||||
|
||||
Aligning upwards is simple now. We just increase `addr` by `align-1` and call `align_down`. We add `align-1` instead of `align` because we would otherwise waste `align` bytes for already aligned addresses.
|
||||
|
||||
### Deallocate
|
||||
But how do we deallocate memory in our bump allocator? Well, we don't ;). We just leak all freed memory for now. Thus our allocator quickly runs out of memory in a real system. On the other hand, it's as fast as an allocator can get: It just increases a single variable when allocating and does nothing at all when deallocating. And RAM is cheap nowadays, right? :)
|
||||
|
||||
(Don't worry, we will introduce a better allocator later in this post.)
|
||||
|
||||
### Custom Allocators in Rust
|
||||
In order to use our crate as system allocator, we add some attributes at the beginning of the file:
|
||||
|
||||
``` rust
|
||||
// in libs/bump_allocator/src/lib.rs
|
||||
|
||||
#![feature(allocator)]
|
||||
|
||||
#![allocator]
|
||||
#![no_std]
|
||||
```
|
||||
The `#![allocator]` attribute tells the compiler that it should not link a default allocator when this crate is linked. The attribute is unstable and feature-gated, so we need to add `#![feature(allocator)]` as well. Allocator crates must not depend on [liballoc], because this would introduce a circular dependency. Thus, allocator crates can't use the standard library either (as it depends on `liballoc`). Therefore all allocator crates must be marked as `#![no_std]`.
|
||||
|
||||
[liballoc]: https://doc.rust-lang.org/nightly/alloc/index.html
|
||||
|
||||
According to [the book][custom-allocators], an allocator crate needs to implement the following five functions:
|
||||
|
||||
[custom-allocators]: https://doc.rust-lang.org/book/custom-allocators.html
|
||||
|
||||
``` rust
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_allocate(size: usize, align: usize) -> *mut u8 {}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_usable_size(size: usize, align: usize) -> usize {}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_deallocate(ptr: *mut u8, size: usize, align: usize) {}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate(ptr: *mut u8, size: usize, new_size: usize,
|
||||
align: usize) -> *mut u8 {}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate_inplace(ptr: *mut u8, size: usize,
|
||||
new_size: usize, align: usize)
|
||||
-> usize {}
|
||||
```
|
||||
|
||||
These functions are highly unstable and the compiler does not check their types. So make sure that the type, number, and order of parameters are correct when you implement it.
|
||||
|
||||
Let's look at each function individually:
|
||||
|
||||
- The `__rust_allocate` function allocates a block of memory with the given size (in bytes) and alignment. _Alignment_ means that the start address of the allocation needs to be a multiple of the `align` parameter. This is required because some CPUs can only access e.g. 4 byte aligned addresses. The alignment is always a power of 2.
|
||||
- The `__rust_usable_size` returns the usable size of an allocation created with the specified size and alignment. The usable size is at least `size`, but might be larger if the allocator uses fixed block sizes. For example, a [buddy allocator] rounds the size of each allocation to the next power of two.
|
||||
- The `__rust_deallocate` function frees the memory block referenced `ptr` again. The `size` and `align` parameters contain the values that were used to create the allocation. Thus the allocator knows exactly how much memory it needs to free. In constrast, the [free function] of C only has a single `ptr` argument. So a C allocator needs to [maintain information][c free info] about the size of each block itself. In Rust, the compiler maintains this information for us.
|
||||
- The `__rust_reallocate` function changes the size of the block referenced by `ptr` from `size` to `new_size`. If it's possible to do in in-place, the function resizes the block and returns `ptr` again. Else, it allocates a new block of `new_size` and copies the memory contents from the old block. Then it frees the old block and returns the pointer to the new block.
|
||||
- The `__rust_reallocate_inplace` function tries to change the size of the block referenced by `ptr` from `size` to `new_size` without relocating the memory block. If it succeeds, it returns `usable_size(new_size, align)`, else it returns `usable_size(size, align)`.
|
||||
|
||||
[buddy allocator]: https://en.wikipedia.org/wiki/Buddy_memory_allocation
|
||||
[free function]: http://www.cplusplus.com/reference/cstdlib/free/
|
||||
[c free info]: http://stackoverflow.com/questions/1518711/how-does-free-know-how-much-to-free
|
||||
|
||||
A more detailed documentation for these functions can be found in the [API docs for alloc::heap][alloc::heap]. Note that all of these functions and custom allocators in general are _unstable_ (as indicated by the `allocator` feature gate).
|
||||
|
||||
[alloc::heap]: https://doc.rust-lang.org/nightly/alloc/heap/
|
||||
|
||||
### Implementation
|
||||
Let's implement the allocation functions using our new allocator. First we need a way to access the allocator. The functions do not know anything about our allocator, so we can only access it through a `static`:
|
||||
|
||||
``` rust
|
||||
// in libs/bump_allocator/src/lib.rs
|
||||
|
||||
use spin::Mutex;
|
||||
|
||||
extern crate spin;
|
||||
|
||||
pub const HEAP_START: usize = 0o_000_001_000_000_0000;
|
||||
pub const HEAP_SIZE: usize = 100 * 1024; // 100 KiB
|
||||
|
||||
static BUMP_ALLOCATOR: Mutex<BumpAllocator> = Mutex::new(
|
||||
BumpAllocator::new(HEAP_START, HEAP_SIZE));
|
||||
```
|
||||
|
||||
We use `0o_000_001_000_000_0000` as heap start address, which is the address starting at the second `P3` entry. It doesn't really matter which address we choose here as long as it's unused. We use a heap size of 100 KiB, which should be large enough for the near future. The static allocator is protected by a spinlock since we need to able to modify it. Our allocator crate is distinct from our main crate, so we need to add the `spin` dependency to its `Cargo.toml` as well. The easiest way is to run `cargo add spin` (using the [cargo-edit] crate).
|
||||
|
||||
[cargo-edit]: https://github.com/killercup/cargo-edit
|
||||
|
||||
Now we can easily implement the `__rust_allocate` and `__rust_deallocate` functions:
|
||||
|
||||
```rust
|
||||
// in libs/bump_allocator/src/lib.rs
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_allocate(size: usize, align: usize) -> *mut u8 {
|
||||
BUMP_ALLOCATOR.lock().allocate(size, align).expect("out of memory")
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_deallocate(_ptr: *mut u8, _size: usize,
|
||||
_align: usize)
|
||||
{
|
||||
// just leak it
|
||||
}
|
||||
```
|
||||
We use `expect` to panic in out of memory (OOM) situations. We could alternatively return a null pointer, which indicates an OOM situation to the Rust runtime. However, the runtime would react by aborting the process. On Linux, the abort function intentionally raises an [invalid opcode] exception, which would lead to a boot loop for our kernel. So panickying is a better solution for our kernel.
|
||||
|
||||
[invalid opcode]: http://wiki.osdev.org/Exceptions#Invalid_Opcode
|
||||
|
||||
We never allocate more memory than requested, so the `__rust_usable_size` function is simple:
|
||||
|
||||
```rust
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_usable_size(size: usize, _align: usize) -> usize {
|
||||
size
|
||||
}
|
||||
```
|
||||
|
||||
In order to keep things simple, we don't support the `__rust_reallocate_inplace` function and always return the old size:
|
||||
|
||||
```rust
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate_inplace(_ptr: *mut u8, size: usize,
|
||||
_new_size: usize, _align: usize) -> usize
|
||||
{
|
||||
size
|
||||
}
|
||||
```
|
||||
|
||||
Now only `__rust_reallocate` is left. It's a bit more difficult, since we need to copy the contents of the old allocation to the new allocation. However, we can just steal some code from the official [reallocate implementation for unix][unix realloc]:
|
||||
|
||||
[unix realloc]: https://github.com/rust-lang/rust/blob/c66d2380a810c9a2b3dbb4f93a830b101ee49cc2/src/liballoc_system/lib.rs#L98-L101
|
||||
|
||||
|
||||
``` rust
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate(ptr: *mut u8, size: usize, new_size: usize,
|
||||
align: usize) -> *mut u8 {
|
||||
use core::{ptr, cmp};
|
||||
|
||||
// from: https://github.com/rust-lang/rust/blob/
|
||||
// c66d2380a810c9a2b3dbb4f93a830b101ee49cc2/
|
||||
// src/liballoc_system/lib.rs#L98-L101
|
||||
|
||||
let new_ptr = __rust_allocate(new_size, align);
|
||||
unsafe { ptr::copy(ptr, new_ptr, cmp::min(size, new_size)) };
|
||||
__rust_deallocate(ptr, size, align);
|
||||
new_ptr
|
||||
}
|
||||
```
|
||||
|
||||
That's it! We have successfully created a custom allocator. Now we're ready to test it.
|
||||
|
||||
## Box, Vec, and Friends
|
||||
|
||||
In order to use our new allocator we import it in our main project:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs of our main project
|
||||
|
||||
extern crate bump_allocator;
|
||||
```
|
||||
|
||||
Additionally, we need to tell cargo where our `bump_allocator` crate lives:
|
||||
|
||||
``` toml
|
||||
# in Cargo.toml of our main project
|
||||
|
||||
[dependencies.bump_allocator]
|
||||
path = "libs/bump_allocator"
|
||||
```
|
||||
|
||||
Now we're able to import the `alloc` and `collections` crates in order to unlock `Box`, `Vec`, `BTreeMap`, and friends:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs of our main project
|
||||
|
||||
#![feature(alloc, collections)]
|
||||
|
||||
extern crate bump_allocator;
|
||||
extern crate alloc;
|
||||
#[macro_use]
|
||||
extern crate collections;
|
||||
```
|
||||
The `collections` crate provides the [format!] and [vec!] macros, so we use `#[macro_use]` to import them.
|
||||
|
||||
[format!]: //doc.rust-lang.org/nightly/collections/macro.format!.html
|
||||
[vec!]: https://doc.rust-lang.org/nightly/collections/macro.vec!.html
|
||||
|
||||
## Testing
|
||||
|
||||
Now we should be able to allocate memory on the heap. Let's try it in our `rust_main`:
|
||||
|
||||
```rust
|
||||
// in rust_main in src/lib.rs
|
||||
|
||||
use alloc::boxed::Box;
|
||||
let heap_test = Box::new(42);
|
||||
```
|
||||
|
||||
(If you're getting a linker error about `_Unwind_Resume`, try to use the [panic=abort cargo option].)
|
||||
|
||||
[panic=abort cargo option]: https://github.com/phil-opp/blog_os/pull/170
|
||||
|
||||
When we run it, a triple fault occurs and causes permanent rebooting. Let's try debug it using QEMU and objdump as described [in the previous post][qemu debugging]:
|
||||
|
||||
[qemu debugging]: http://os.phil-opp.com/remap-the-kernel.html#debugging
|
||||
|
||||
```
|
||||
> qemu-system-x86_64 -d int -no-reboot -cdrom build/os-x86_64.iso
|
||||
…
|
||||
check_exception old: 0xffffffff new 0xe
|
||||
0: v=0e e=0002 i=0 cpl=0 IP=0008:0000000000102860 pc=0000000000102860
|
||||
SP=0010:0000000000116af0 CR2=0000000040000000
|
||||
…
|
||||
```
|
||||
Aha! It's a [page fault] (`v=0e`) and was caused by the code at `0x102860`. The code tried to write (`e=0002`) to address `0x40000000`. This address is `0o_000_001_000_000_0000` in octal, which is the `HEAP_START` address defined above. Of course it page-faults: We have forgotten to map the heap memory to some physical memory.
|
||||
|
||||
[page fault]: http://wiki.osdev.org/Exceptions#Page_Fault
|
||||
|
||||
### Some Refactoring
|
||||
In order to map the heap cleanly, we do a bit of refactoring first. We move all memory initialization from our `rust_main` to a new `memory::init` function. Now our `rust_main` looks like this:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
pub extern "C" fn rust_main(multiboot_information_address: usize) {
|
||||
// ATTENTION: we have a very small stack and no guard page
|
||||
vga_buffer::clear_screen();
|
||||
println!("Hello World{}", "!");
|
||||
|
||||
let boot_info = unsafe {
|
||||
multiboot2::load(multiboot_information_address)
|
||||
};
|
||||
enable_nxe_bit();
|
||||
enable_write_protect_bit();
|
||||
|
||||
// set up guard page and map the heap pages
|
||||
memory::init(boot_info);
|
||||
|
||||
use alloc::boxed::Box;
|
||||
let heap_test = Box::new(42);
|
||||
|
||||
println!("It did not crash!");
|
||||
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
|
||||
The `memory::init` function looks like this:
|
||||
|
||||
```rust
|
||||
// in src/memory/mod.rs
|
||||
|
||||
use multiboot2::BootInformation;
|
||||
|
||||
pub fn init(boot_info: &BootInformation) {
|
||||
let memory_map_tag = boot_info.memory_map_tag().expect(
|
||||
"Memory map tag required");
|
||||
let elf_sections_tag = boot_info.elf_sections_tag().expect(
|
||||
"Elf sections tag required");
|
||||
|
||||
let kernel_start = elf_sections_tag.sections()
|
||||
.filter(|s| s.is_allocated()).map(|s| s.addr).min().unwrap();
|
||||
let kernel_end = elf_sections_tag.sections()
|
||||
.filter(|s| s.is_allocated()).map(|s| s.addr + s.size).max()
|
||||
.unwrap();
|
||||
|
||||
println!("kernel start: {:#x}, kernel end: {:#x}",
|
||||
kernel_start,
|
||||
kernel_end);
|
||||
println!("multiboot start: {:#x}, multiboot end: {:#x}",
|
||||
boot_info.start_address(),
|
||||
boot_info.end_address());
|
||||
|
||||
let mut frame_allocator = AreaFrameAllocator::new(
|
||||
kernel_start as usize, kernel_end as usize,
|
||||
boot_info.start_address(), boot_info.end_address(),
|
||||
memory_map_tag.memory_areas());
|
||||
|
||||
paging::remap_the_kernel(&mut frame_allocator, boot_info);
|
||||
}
|
||||
```
|
||||
|
||||
We've just moved the code to a new function. However, we've sneaked some improvements in:
|
||||
|
||||
- An additional `.filter(|s| s.is_allocated())` in the calculation of `kernel_start` and `kernel_end`. This ignores all sections that aren't loaded to memory (such as debug sections). Thus, the kernel end address is no longer artifically increased by such sections.
|
||||
- We use the `start_address()` and `end_address()` methods of `boot_info` instead of calculating the adresses manually.
|
||||
- We use the alternate `{:#x}` form when printing kernel/multiboot addresses. Before, we used `0x{:x}`, which leads to the same result. For a complete list of these “alternate” formatting forms, check out the [std::fmt documentation].
|
||||
|
||||
[std::fmt documentation]: https://doc.rust-lang.org/nightly/std/fmt/index.html#sign0
|
||||
|
||||
### Safety
|
||||
It is important that the `memory::init` function is called only once, because it creates a new frame allocator based on kernel and multiboot start/end. When we call it a second time, a new frame allocator is created that reassigns the same frames, even if they are already in use.
|
||||
|
||||
In the second call it would use an identical frame allocator to remap the kernel. The `remap_the_kernel` function would request a frame from the frame allocator to create a new page table. But the returned frame is already in use, since we used it to create our current page table in the first call. In order to initialize the new table, the function zeroes it. This is the point where everything breaks, since we zero our current page table. The CPU is unable to read the next instruction and throws a page fault.
|
||||
|
||||
So we need to ensure that `memory::init` can be only called once. We could mark it as `unsafe`, which would bring it in line with Rust's memory safety rules. However, that would just push the unsafety to the caller. The caller can still accidentally call the function twice, the only difference is that the mistake needs to happen inside `unsafe` blocks.
|
||||
|
||||
A better solution is to insert a check at the function's beginning, that panics if the function is called a second time. This approach has a small runtime cost, but we only call it once, so it's negligible. And we avoid two `unsafe` blocks (one at the calling site and one at the function itself), which is always good.
|
||||
|
||||
In order to make such checks easy, I created a small crate named [once]. To add it, we run `cargo add once` and add the following to our `src/lib.rs`:
|
||||
|
||||
[once]: https://crates.io/crates/once
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
#[macro_use]
|
||||
extern crate once;
|
||||
```
|
||||
|
||||
The crate provides an [assert_has_not_been_called!] macro (sorry for the long name :D). We can use it to fix the safety problem easily:
|
||||
|
||||
[assert_has_not_been_called!]: https://phil-opp.rustdocs.org/once/macro.assert_has_not_been_called!.html
|
||||
|
||||
``` rust
|
||||
// in src/memory/mod.rs
|
||||
|
||||
pub fn init(boot_info: &BootInformation) {
|
||||
assert_has_not_been_called!("memory::init must be called only once");
|
||||
|
||||
let memory_map_tag = ...
|
||||
...
|
||||
}
|
||||
```
|
||||
That's it. Now our `memory::init` function can only be called once. The macro works by creating a static [AtomicBool] named `CALLED`, which is initialized to `false`. When the macro is invoked, it checks the value of `CALLED` and sets it to `true`. If the value was already `true` before, the macro panics.
|
||||
|
||||
[AtomicBool]: https://doc.rust-lang.org/nightly/core/sync/atomic/struct.AtomicBool.html
|
||||
|
||||
### Mapping the Heap
|
||||
Now we're ready to map the heap pages. In order to do it, we need access to the `ActivePageTable` or `Mapper` instance (see the [previous post]). Therefore we return it from the `paging::remap_the_kernel` function:
|
||||
|
||||
[previous post]: {{ page.previous.url }}
|
||||
|
||||
```rust
|
||||
// in src/memory/paging/mod.rs
|
||||
|
||||
pub fn remap_the_kernel<A>(allocator: &mut A, boot_info: &BootInformation)
|
||||
-> ActivePageTable // new
|
||||
where A: FrameAllocator
|
||||
{
|
||||
...
|
||||
println!("guard page at {:#x}", old_p4_page.start_address());
|
||||
|
||||
active_table // new
|
||||
}
|
||||
```
|
||||
|
||||
Now we have full page table access in the `memory::init` function. This allows us to map the heap pages to physical frames:
|
||||
|
||||
```rust
|
||||
// in src/memory/mod.rs
|
||||
|
||||
pub fn init(boot_info: &BootInformation) {
|
||||
...
|
||||
|
||||
let mut frame_allocator = ...;
|
||||
|
||||
// below is the new part
|
||||
|
||||
let mut active_table = paging::remap_the_kernel(&mut frame_allocator,
|
||||
boot_info);
|
||||
|
||||
use self::paging::Page;
|
||||
use bump_allocator::{HEAP_START, HEAP_SIZE};
|
||||
|
||||
let heap_start_page = Page::containing_address(HEAP_START);
|
||||
let heap_end_page = Page::containing_address(HEAP_START + HEAP_SIZE-1);
|
||||
|
||||
for page in Page::range_inclusive(heap_start_page, heap_end_page) {
|
||||
active_table.map(page, paging::WRITABLE, &mut frame_allocator);
|
||||
}
|
||||
```
|
||||
|
||||
The `Page::range_inclusive` function is just a copy of the `Frame::range_inclusive` function:
|
||||
|
||||
```rust
|
||||
// in src/memory/paging/mod.rs
|
||||
|
||||
#[derive(…, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Page {...}
|
||||
|
||||
impl Page {
|
||||
...
|
||||
pub fn range_inclusive(start: Page, end: Page) -> PageIter {
|
||||
PageIter {
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PageIter {
|
||||
start: Page,
|
||||
end: Page,
|
||||
}
|
||||
|
||||
impl Iterator for PageIter {
|
||||
type Item = Page;
|
||||
|
||||
fn next(&mut self) -> Option<Page> {
|
||||
if self.start <= self.end {
|
||||
let page = self.start;
|
||||
self.start.number += 1;
|
||||
Some(page)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now we map the whole heap to physical pages. This needs some time and might introduce a noticeable delay when we increase the heap size in the future. Another drawback is that we consume a large amount of physical frames even though we might not need the whole heap space. We will fix these problems in a future post by mapping the pages lazily.
|
||||
|
||||
### It works!
|
||||
|
||||
Now `Box` and `Vec` should work. For example:
|
||||
|
||||
```rust
|
||||
// in rust_main in src/lib.rs
|
||||
|
||||
use alloc::boxed::Box;
|
||||
let mut heap_test = Box::new(42);
|
||||
*heap_test -= 15;
|
||||
let heap_test2 = Box::new("hello");
|
||||
println!("{:?} {:?}", heap_test, heap_test2);
|
||||
|
||||
let mut vec_test = vec![1,2,3,4,5,6,7];
|
||||
vec_test[3] = 42;
|
||||
for i in &vec_test {
|
||||
print!("{} ", i);
|
||||
}
|
||||
```
|
||||
|
||||
We can also use all other types of the `alloc` and `collections` crates, including:
|
||||
|
||||
- the reference counted pointers [Rc] and [Arc]
|
||||
- the owned string type [String] and the [format!] macro
|
||||
- [Linked List]
|
||||
- the growable ring buffer [VecDeque]
|
||||
- [BinaryHeap]
|
||||
- [BTreeMap] and [BTreeSet]
|
||||
|
||||
[Rc]: https://doc.rust-lang.org/nightly/alloc/rc/
|
||||
[Arc]: https://doc.rust-lang.org/nightly/alloc/arc/
|
||||
[String]: https://doc.rust-lang.org/nightly/collections/string/struct.String.html
|
||||
[Linked List]: https://doc.rust-lang.org/nightly/collections/linked_list/struct.LinkedList.html
|
||||
[VecDeque]: https://doc.rust-lang.org/nightly/collections/vec_deque/struct.VecDeque.html
|
||||
[BinaryHeap]: https://doc.rust-lang.org/nightly/collections/binary_heap/struct.BinaryHeap.html
|
||||
[BTreeMap]: https://doc.rust-lang.org/nightly/collections/btree_map/struct.BTreeMap.html
|
||||
[BTreeSet]: https://doc.rust-lang.org/nightly/collections/btree_set/struct.BTreeSet.html
|
||||
|
||||
## A better Allocator
|
||||
Right now, we leak every freed memory block. Thus, we run out of memory quickly, for example, by creating a new `String` in each iteration of a loop:
|
||||
|
||||
```rust
|
||||
// in rust_main in src/lib.rs
|
||||
|
||||
for i in 0..10000 {
|
||||
format!("Some String");
|
||||
}
|
||||
```
|
||||
|
||||
To fix this, we need to create an allocator that keeps track of freed memory blocks and reuses them if possible. This introduces some challenges:
|
||||
|
||||
- We need to keep track of a possibly unlimited number of freed blocks. For example, an application could allocate `n` one-byte sized blocks and free every second block, which creates `n/2` freed blocks. We can't rely on any upper bound of freed block since `n` could be arbitrarily large.
|
||||
- We can't use any of the collections from above, since they rely on allocations themselves. (It might be possible as soon as [RFC #1398] is [implemented][#32838], which allows user-defined allocators for specific collection instances.)
|
||||
- We need to merge adjacent freed blocks if possible. Otherwise, the freed memory is no longer usable for large allocations. We will discuss this point in more detail below.
|
||||
- Our allocator should search the set of freed blocks quickly and keep fragmentation low.
|
||||
|
||||
[RFC #1398]: https://github.com/rust-lang/rfcs/blob/master/text/1398-kinds-of-allocators.md
|
||||
[#32838]: https://github.com/rust-lang/rust/issues/32838
|
||||
|
||||
### Creating a List of freed Blocks
|
||||
|
||||
Where do we store the information about an unlimited number of freed blocks? We can't use any fixed size data structure since it could always be too small for some allocation sequences. So we need some kind of dynamically growing set.
|
||||
|
||||
One possible solution could be to use an array-like data structure that starts at some unused virtual address. If the array becomes full, we increase its size and map new physical frames as backing storage. This approach would require a large part of the virtual address space since the array could grow significantly. We would need to create a custom implementation of a growable array and manipulate the page tables when deallocating. It would also consume a possibly large number of physical frames as backing storage.
|
||||
|
||||
We will choose another solution with different tradoffs. It's not clearly “better” than the approach above and has significant disadvantages itself. However, it has one big advantage: It does not need any additional physical or virtual memory at all. This makes it less complex since we don't need to manipulate any page tables. The idea is the following:
|
||||
|
||||
A freed memory block is not used anymore and no one needs the stored information. It is still mapped to a virtual address and backed by a physical page. So we just store the information about the freed block _in the block itself_. We keep a pointer to the first block and store a pointer to the next block in each block. Thus, we create a single linked list:
|
||||
|
||||

|
||||
|
||||
In the following, we call a freed block a _hole_. Each hole stores its size and a pointer to the next hole. If a hole is larger than needed, we leave the remaining memory unused. By storing a pointer to the first hole, we are able to traverse the complete list.
|
||||
|
||||
#### Initialization
|
||||
When the heap is created, all of its memory is unused. Thus, it forms a single large hole:
|
||||
|
||||

|
||||
|
||||
The optional pointer to the next hole is set to `None`.
|
||||
|
||||
#### Allocation
|
||||
In order to allocate a block of memory, we need to find a hole that satisfies the size and alignment requirements. If the found hole is larger than required, we split it into two smaller holes. For example, when we allocate a 24 byte block right after initialization, we split the single hole into a hole of size 24 and a hole with the remaining size:
|
||||
|
||||

|
||||
|
||||
Then we use the new 24 byte hole to perform the allocation:
|
||||
|
||||

|
||||
|
||||
To find a suitable hole, we can use several search strategies:
|
||||
|
||||
- **best fit**: Search the whole list and choose the _smallest_ hole that satisfies the requirements.
|
||||
- **worst fit**: Search the whole list and choose the _largest_ hole that satisfies the requirements.
|
||||
- **first fit**: Search the list from the beginning and choose the _first_ hole that satisfies the requirements.
|
||||
|
||||
Each strategy has its advantages and disadvantages. Best fit uses the smallest hole possible and leaves larger holes for large allocations. But splitting the smallest hole might create a tiny hole, which is too small for most allocations. In contrast, the worst fit strategy always chooses the largest hole. Thus, it does not create tiny holes, but it consumes the large block, which might be required for large allocations.
|
||||
|
||||
For our use case, the best fit strategy is better than worst fit. The reason is that we have a minimal hole size of 16 bytes, since each hole needs to be able to store a size (8 bytes) and a pointer to the next hole (8 bytes). Thus, even the best fit strategy leads to holes of usable size. Furthermore, we will need to allocate very large blocks occasionally (e.g. for [DMA] buffers).
|
||||
|
||||
[DMA]: https://en.wikipedia.org/wiki/Direct_memory_access
|
||||
|
||||
However, both best fit and worst fit have a significant problem: They need to scan the whole list for each allocation in order to find the optimal block. This leads to long allocation times if the list is long. The first fit strategy does not have this problem, as it returns as soon as it finds a suitable hole. It is fairly fast for small allocations and might only need to scan the whole list for large allocations.
|
||||
|
||||
#### Deallocation
|
||||
To deallocate a block of memory, we can just insert its corresponding hole somewhere into the list. However, we need to merge adjacent holes. Otherwise, we are unable to reuse the freed memory for larger allocations. For example:
|
||||
|
||||

|
||||
|
||||
In order to use these adjacent holes for a large allocation, we need to merge them to a single large hole first:
|
||||
|
||||

|
||||
|
||||
The easiest way to ensure that adjacent holes are always merged, is to keep the hole list sorted by address. Thus, we only need to check the predecessor and the successor in the list when we free a memory block. If they are adjacent to the freed block, we merge the corresponding holes. Else, we insert the freed block as a new hole at the correct position.
|
||||
|
||||
### Implementation
|
||||
The detailed implementation would go beyond the scope of this post, since it contains several hidden difficulties. For example:
|
||||
|
||||
- Several merge cases: Merge with the previous hole, merge with the next hole, merge with both holes.
|
||||
- We need to satisfy the alignment requirements, which requires additional splitting logic.
|
||||
- The minimal hole size of 16 bytes: We must not create smaller holes when splitting a hole.
|
||||
|
||||
I created the [linked_list_allocator] crate to handle all of these cases. It consists of a [Heap struct] that provides an `allocate_first_fit` and a `deallocate` method. If you are interested in the implementation details, check out the [source code][linked_list_allocator source].
|
||||
|
||||
[linked_list_allocator]: https://crates.io/crates/linked_list_allocator
|
||||
[Heap struct]: http://phil-opp.github.io/linked-list-allocator/linked_list_allocator/struct.Heap.html
|
||||
[linked_list_allocator source]: https://github.com/phil-opp/linked-list-allocator
|
||||
|
||||
So we just need to implement Rust's allocation modules and integrate it into our kernel. We start by creating a new `hole_list_allocator`[^1] crate inside the `libs` directory:
|
||||
|
||||
[^1]: The name `linked_list_allocator` is already taken, sorry :P.
|
||||
|
||||
```shell
|
||||
> cd libs
|
||||
> cargo new hole_list_allocator
|
||||
> cd hole_list_allocator
|
||||
```
|
||||
|
||||
We add the `allocator` and `no_std` attributes to `src/lib.rs` like described above:
|
||||
|
||||
```rust
|
||||
// in libs/hole_list_allocator/src/lib.rs
|
||||
|
||||
#![feature(allocator)]
|
||||
|
||||
#![allocator]
|
||||
#![no_std]
|
||||
```
|
||||
|
||||
We also add a static allocator protected by a spinlock, but this time we use the `Heap` type of the `linked_list_allocator` crate:
|
||||
|
||||
```rust
|
||||
// in libs/hole_list_allocator/src/lib.rs
|
||||
|
||||
#![feature(const_fn)]
|
||||
|
||||
use spin::Mutex;
|
||||
use linked_list_allocator::Heap;
|
||||
|
||||
extern crate spin;
|
||||
extern crate linked_list_allocator;
|
||||
|
||||
pub const HEAP_START: usize = 0o_000_001_000_000_0000;
|
||||
pub const HEAP_SIZE: usize = 100 * 1024; // 100 KiB
|
||||
|
||||
static HEAP: Mutex<Heap> = Mutex::new(Heap::new(HEAP_START, HEAP_SIZE));
|
||||
```
|
||||
Note that we use the same values for `HEAP_START` and `HEAP_SIZE` as in the `bump_allocator`.
|
||||
|
||||
We need to add the extern crates to our `Cargo.toml`:
|
||||
|
||||
``` shell
|
||||
> cargo add spin
|
||||
> cargo add linked_list_allocator
|
||||
```
|
||||
|
||||
However, we get an error when we try to compile it:
|
||||
|
||||
```
|
||||
error: function calls in statics are limited to constant functions,
|
||||
struct and enum constructors [E0015]
|
||||
static HEAP: Mutex<Heap> = Mutex::new(Heap::new(HEAP_START, HEAP_SIZE));
|
||||
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
```
|
||||
The reason is that the `Heap::new` function needs to initialize the first hole (like described [above](#initialization)). This can't be done at compile time, so the function can't be a `const` function. Therefore we can't use it to initialize a static.
|
||||
|
||||
There is an easy solution for crates with access to the standard library: [lazy_static]. It automatically initializes the static when it's used the first time. By default, it relies on the `std::sync::once` module and is thus unusable in our kernel. Fortunately it has a `spin_no_std` feature for `no_std` projects.
|
||||
|
||||
[lazy_static]: https://github.com/rust-lang-nursery/lazy-static.rs
|
||||
|
||||
So let's use the `lazy_static!` macro to fix our `hole_list_allocator`:
|
||||
|
||||
```toml
|
||||
# in libs/hole_list_allocator/Cargo.toml
|
||||
|
||||
[dependencies.lazy_static]
|
||||
version = "0.2.1"
|
||||
features = ["spin_no_std"]
|
||||
```
|
||||
|
||||
```rust
|
||||
// in libs/hole_list_allocator/src/lib.rs
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
static ref HEAP: Mutex<Heap> = Mutex::new(unsafe {
|
||||
Heap::new(HEAP_START, HEAP_SIZE)
|
||||
});
|
||||
}
|
||||
```
|
||||
The `unsafe` block is required since `Heap::new` is `unsafe`. It's unsafe because it assumes that `HEAP_START` is a valid and unused address.
|
||||
|
||||
Now we can implement the allocation functions:
|
||||
|
||||
```rust
|
||||
// in libs/hole_list_allocator/src/lib.rs
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_allocate(size: usize, align: usize) -> *mut u8 {
|
||||
HEAP.lock().allocate_first_fit(size, align).expect("out of memory")
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_deallocate(ptr: *mut u8, size: usize, align: usize) {
|
||||
unsafe { HEAP.lock().deallocate(ptr, size, align) };
|
||||
}
|
||||
```
|
||||
|
||||
The remaining functions are implemented like above:
|
||||
|
||||
```rust
|
||||
// in libs/hole_list_allocator/src/lib.rs
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_usable_size(size: usize, _align: usize) -> usize {
|
||||
size
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate_inplace(_ptr: *mut u8, size: usize,
|
||||
_new_size: usize, _align: usize) -> usize
|
||||
{
|
||||
size
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern fn __rust_reallocate(ptr: *mut u8, size: usize, new_size: usize,
|
||||
align: usize) -> *mut u8 {
|
||||
use core::{ptr, cmp};
|
||||
|
||||
// from: https://github.com/rust-lang/rust/blob/
|
||||
// c66d2380a810c9a2b3dbb4f93a830b101ee49cc2/
|
||||
// src/liballoc_system/lib.rs#L98-L101
|
||||
|
||||
let new_ptr = __rust_allocate(new_size, align);
|
||||
unsafe { ptr::copy(ptr, new_ptr, cmp::min(size, new_size)) };
|
||||
__rust_deallocate(ptr, size, align);
|
||||
new_ptr
|
||||
}
|
||||
```
|
||||
|
||||
Now we just need to replace every use of `bump_allocator` with `hole_list_allocator` in our kernel:
|
||||
|
||||
```toml
|
||||
# in Cargo.toml
|
||||
|
||||
[dependencies.hole_list_allocator]
|
||||
path = "libs/hole_list_allocator"
|
||||
```
|
||||
|
||||
```diff
|
||||
in src/lib.rs:
|
||||
|
||||
-extern crate bump_allocator;
|
||||
+extern crate hole_list_allocator;
|
||||
|
||||
in memory::init in src/memory/mod.rs:
|
||||
|
||||
-use bump_allocator::{HEAP_START, HEAP_SIZE};
|
||||
+use hole_list_allocator::{HEAP_START, HEAP_SIZE};
|
||||
```
|
||||
|
||||
Our kernel uses the new allocator now, so we can deallocate memory without leaking it. The example from above should work now without causing an OOM situation:
|
||||
|
||||
```rust
|
||||
// in rust_main in src/lib.rs
|
||||
|
||||
for i in 0..10000 {
|
||||
format!("Some String");
|
||||
}
|
||||
```
|
||||
|
||||
### Performance
|
||||
The linked list based approach has some performance problems. Each allocation or deallocation might need to scan the complete list of holes in the worst case. However, I think it's good enough for now, since our heap will stay relatively small for the near future. When our allocator becomes a performance problem eventually, we can just replace it with a faster alternative.
|
||||
|
||||
## Summary
|
||||
Now we're able to use heap storage in our kernel without leaking memory. This allows us to effectively process dynamic data such as user supplied strings in the future. We can also use `Rc` and `Arc` to create types with shared ownership. And we have access to various data structures such as `Vec` or `Linked List`, which will make our lives much easier. We even have some well tested and optimized [binary heap] and [B-tree] implementations!
|
||||
|
||||
[binary heap]:https://en.wikipedia.org/wiki/Binary_heap
|
||||
[B-tree]: https://en.wikipedia.org/wiki/B-tree
|
||||
|
||||
## What's next?
|
||||
This post concludes the section about memory management for now. We will revisit this topic eventually, but now it's time to explore other topics. The upcoming posts will be about CPU exceptions and interrupts. We will catch all page, double, and triple faults and create a driver to read keyboard input. The [next post] starts by setting up a so-called _Interrupt Descriptor Table_.
|
||||
|
||||
[next post]: {{% relref "2016-05-28-catching-exceptions.md" %}}
|
||||
@@ -1,663 +0,0 @@
|
||||
+++
|
||||
title = "Catching Exceptions"
|
||||
date = "2016-05-28"
|
||||
updated = "2016-06-25"
|
||||
+++
|
||||
|
||||
In this post, we start exploring exceptions. We set up an interrupt descriptor table and add handler functions. At the end of this post, our kernel will be able to catch divide-by-zero faults.
|
||||
|
||||
<!--more--><aside id="toc"></aside>
|
||||
|
||||
As always, the complete source code is on [Github]. Please file [issues] for any problems, questions, or improvement suggestions. There is also a comment section at the end of this page.
|
||||
|
||||
[Github]: https://github.com/phil-opp/blog_os/tree/catching_exceptions
|
||||
[issues]: https://github.com/phil-opp/blog_os/issues
|
||||
|
||||
**Update**: Due to a subtle [stack alignment bug], we no longer catch page faults in this post. Instead, we catch divide-by-zero errors.
|
||||
|
||||
[stack alignment bug]: https://github.com/phil-opp/blog_os/issues/184
|
||||
|
||||
## Exceptions
|
||||
An exception signals that something is wrong with the current instruction. For example, the CPU issues an exception if the current instruction tries to divide by 0. When an exception occurs, the CPU interrupts its current work and immediately calls a specific exception handler function, depending on the exception type.
|
||||
|
||||
We've already seen several types of exceptions in our kernel:
|
||||
|
||||
- **Invalid Opcode**: This exception occurs when the current instruction is invalid. For example, this exception occurred when we tried to use SSE instructions before enabling SSE. Without SSE, the CPU didn't know the `movups` and `movaps` instructions, so it throws an exception when it stumbles over them.
|
||||
- **Page Fault**: A page fault occurs on illegal memory accesses. For example, if the current instruction tries to read from an unmapped page or tries to write to a read-only page.
|
||||
- **Double Fault**: When an exception occurs, the CPU tries to call the corresponding handler function. If another exception exception occurs _while calling the exception handler_, the CPU raises a double fault exception. This exception also occurs when there is no handler function registered for an exception.
|
||||
- **Triple Fault**: If an exception occurs while the CPU tries to call the double fault handler function, it issues a fatal _triple fault_. We can't catch or handle a triple fault. Most processors react by resetting themselves and rebooting the operating system. This causes the bootloops we experienced in the previous posts.
|
||||
|
||||
For the full list of exceptions check out the [OSDev wiki][exceptions].
|
||||
|
||||
[exceptions]: http://wiki.osdev.org/Exceptions
|
||||
|
||||
### The Interrupt Descriptor Table
|
||||
In order to catch and handle exceptions, we have to set up a so-called _Interrupt Descriptor Table_ (IDT). In this table we can specify a handler function for each CPU exception. The hardware uses this table directly, so we need to follow a predefined format. Each entry must have the following 16-byte structure:
|
||||
|
||||
Type| Name | Description
|
||||
----|--------------------------|-----------------------------------
|
||||
u16 | Function Pointer [0:15] | The lower bits of the pointer to the handler function.
|
||||
u16 | GDT selector | Selector of a code segment in the GDT.
|
||||
u16 | Options | (see below)
|
||||
u16 | Function Pointer [16:31] | The middle bits of the pointer to the handler function.
|
||||
u32 | Function Pointer [32:63] | The remaining bits of the pointer to the handler function.
|
||||
u32 | Reserved |
|
||||
|
||||
The options field has the following format:
|
||||
|
||||
Bits | Name | Description
|
||||
------|-----------------------------------|-----------------------------------
|
||||
0-2 | Interrupt Stack Table Index | 0: Don't switch stacks, 1-7: Switch to the n-th stack in the Interrupt Stack Table when this handler is called.
|
||||
3-7 | Reserved |
|
||||
8 | 0: Interrupt Gate, 1: Trap Gate | If this bit is 0, interrupts are disabled when this handler is called.
|
||||
9-11 | must be one |
|
||||
12 | must be zero |
|
||||
13‑14 | Descriptor Privilege Level (DPL) | The minimal privilege level required for calling this handler.
|
||||
15 | Present |
|
||||
|
||||
Each exception has a predefined IDT index. For example the invalid opcode exception has table index 6 and the page fault exception has table index 14. Thus, the hardware can automatically load the corresponding IDT entry for each exception. The [Exception Table][exceptions] in the OSDev wiki shows the IDT indexes of all exceptions in the “Vector nr.” column.
|
||||
|
||||
When an exception occurs, the CPU roughly does the following:
|
||||
|
||||
1. Read the corresponding entry from the Interrupt Descriptor Table (IDT). For example, the CPU reads the 14-th entry when a page fault occurs.
|
||||
2. Check if the entry is present. Raise a double fault if not.
|
||||
3. Push some registers on the stack, including the instruction pointer and the [EFLAGS] register. (We will use these values in a future post.)
|
||||
4. Disable interrupts if the entry is an interrupt gate (bit 40 not set).
|
||||
5. Load the specified GDT selector into the CS segment.
|
||||
6. Jump to the specified handler function.
|
||||
|
||||
[EFLAGS]: https://en.wikipedia.org/wiki/FLAGS_register
|
||||
|
||||
## Handling Exceptions
|
||||
Let's try to catch and handle CPU exceptions. We start by creating a new `interrupts` module with an `idt` submodule:
|
||||
|
||||
``` rust
|
||||
// in src/lib.rs
|
||||
...
|
||||
mod interrupts;
|
||||
...
|
||||
```
|
||||
``` rust
|
||||
// src/interrupts/mod.rs
|
||||
|
||||
mod idt;
|
||||
```
|
||||
|
||||
Now we create types for the IDT and its entries:
|
||||
|
||||
```rust
|
||||
// src/interrupts/idt.rs
|
||||
|
||||
use x86::segmentation::{self, SegmentSelector};
|
||||
|
||||
pub struct Idt([Entry; 16]);
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(C, packed)]
|
||||
pub struct Entry {
|
||||
pointer_low: u16,
|
||||
gdt_selector: SegmentSelector,
|
||||
options: EntryOptions,
|
||||
pointer_middle: u16,
|
||||
pointer_high: u32,
|
||||
reserved: u32,
|
||||
}
|
||||
```
|
||||
|
||||
The IDT is variable sized and can have up to 256 entries. We only need the first 16 entries in this post, so we define the table as `[Entry; 16]`. The remaining 240 handlers are treated as non-present by the CPU.
|
||||
|
||||
The `Entry` type is the translation of the above table to Rust. The `repr(C, packed)` attribute ensures that the compiler keeps the field ordering and does not add any padding between them. Instead of describing the `gdt_selector` as a plain `u16`, we use the `SegmentSelector` type of the `x86` crate. We also merge bits 32 to 47 into an `option` field, because Rust has no `u3` or `u1` type. The `EntryOptions` type is described below:
|
||||
|
||||
### Entry Options
|
||||
The `EntryOptions` type has the following skeleton:
|
||||
|
||||
``` rust
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EntryOptions(u16);
|
||||
|
||||
impl EntryOptions {
|
||||
fn new() -> Self {...}
|
||||
|
||||
pub fn set_present(&mut self, present: bool) {...}
|
||||
|
||||
pub fn disable_interrupts(&mut self, disable: bool) {...}
|
||||
|
||||
pub fn set_privilege_level(&mut self, dpl: u16) {...}
|
||||
|
||||
pub fn set_stack_index(&mut self, index: u16) {...}
|
||||
}
|
||||
```
|
||||
|
||||
The implementations of these methods need to modify the correct bits of the `u16` without touching the other bits. For example, we would need the following bit-fiddling to set the stack index:
|
||||
|
||||
``` rust
|
||||
self.0 = (self.0 & 0xfff8) | stack_index;
|
||||
```
|
||||
|
||||
Or alternatively:
|
||||
|
||||
``` rust
|
||||
self.0 = (self.0 & (!0b111)) | stack_index;
|
||||
```
|
||||
|
||||
Or:
|
||||
|
||||
``` rust
|
||||
self.0 = ((self.0 >> 3) << 3) | stack_index;
|
||||
```
|
||||
|
||||
Well, none of these variants is really _readable_ and it's very easy to make mistakes somewhere. Therefore I created a `BitField` type with the following [Range]-based API:
|
||||
|
||||
[Range]: https://doc.rust-lang.org/nightly/core/ops/struct.Range.html
|
||||
|
||||
``` rust
|
||||
self.0.set_range(0..3, stack_index);
|
||||
```
|
||||
|
||||
I think it is much more readable, since we abstracted away all bit-masking details. The `BitField` type is contained in the [bit_field] crate. (It's pretty new, so it might still contain bugs.) To add it as dependency, we run `cargo add bit_field` and add `extern crate bit_field;` to our `src/lib.rs`.
|
||||
|
||||
[bit_field]: https://crates.io/crates/bit_field
|
||||
|
||||
Now we can use the crate to implement the methods of `EntryOptions`:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/idt.rs
|
||||
|
||||
use bit_field::BitField;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EntryOptions(BitField<u16>);
|
||||
|
||||
impl EntryOptions {
|
||||
fn minimal() -> Self {
|
||||
let mut options = BitField::new(0);
|
||||
options.set_range(9..12, 0b111); // 'must-be-one' bits
|
||||
EntryOptions(options)
|
||||
}
|
||||
|
||||
fn new() -> Self {
|
||||
let mut options = Self::minimal();
|
||||
options.set_present(true).disable_interrupts(true);
|
||||
options
|
||||
}
|
||||
|
||||
pub fn set_present(&mut self, present: bool) -> &mut Self {
|
||||
self.0.set_bit(15, present);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn disable_interrupts(&mut self, disable: bool) -> &mut Self {
|
||||
self.0.set_bit(8, !disable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_privilege_level(&mut self, dpl: u16) -> &mut Self {
|
||||
self.0.set_range(13..15, dpl);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_stack_index(&mut self, index: u16) -> &mut Self {
|
||||
self.0.set_range(0..3, index);
|
||||
self
|
||||
}
|
||||
}
|
||||
```
|
||||
Note that the ranges are _exclusive_ the upper bound. The `minimal` function creates an `EntryOptions` type with only the “must-be-one” bits set. The `new` function, on the other hand, chooses reasonable defaults: It sets the present bit (why would you want to create a non-present entry?) and disables interrupts (normally we don't want that our exception handlers can be interrupted). By returning the self pointer from the `set_*` methods, we allow easy method chaining such as `options.set_present(true).disable_interrupts(true)`.
|
||||
|
||||
### Creating IDT Entries
|
||||
Now we can add a function to create new IDT entries:
|
||||
|
||||
```rust
|
||||
impl Entry {
|
||||
fn new(gdt_selector: SegmentSelector, handler: HandlerFunc) -> Self {
|
||||
let pointer = handler as u64;
|
||||
Entry {
|
||||
gdt_selector: gdt_selector,
|
||||
pointer_low: pointer as u16,
|
||||
pointer_middle: (pointer >> 16) as u16,
|
||||
pointer_high: (pointer >> 32) as u32,
|
||||
options: EntryOptions::new(),
|
||||
reserved: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
We take a GDT selector and a handler function as arguments and create a new IDT entry for it. The `HandlerFunc` type is described below. It is a function pointer that can be converted to an `u64`. We choose the lower 16 bits for `pointer_low`, the next 16 bits for `pointer_middle` and the remaining 32 bits for `pointer_high`. For the options field we choose our default options, i.e. present and disabled interrupts.
|
||||
|
||||
### The Handler Function Type
|
||||
|
||||
The `HandlerFunc` type is a type alias for a function type:
|
||||
|
||||
``` rust
|
||||
pub type HandlerFunc = extern "C" fn() -> !;
|
||||
```
|
||||
It needs to be a function with a defined [calling convention], as it called directly by the hardware. The C calling convention is the de facto standard in OS development, so we're using it, too. The function takes no arguments, since the hardware doesn't supply any arguments when jumping to the handler function.
|
||||
|
||||
[calling convention]: https://en.wikipedia.org/wiki/Calling_convention
|
||||
|
||||
It is important that the function is [diverging], i.e. it must never return. The reason is that the hardware doesn't _call_ the handler functions, it just _jumps_ to them after pushing some values to the stack. So our stack might look different:
|
||||
|
||||
[diverging]: https://doc.rust-lang.org/book/functions.html#diverging-functions
|
||||
|
||||

|
||||
|
||||
If our handler function returned normally, it would try to pop the return address from the stack. But it might get some completely different value then. For example, the CPU pushes an error code for some exceptions. Bad things would happen if we interpreted this error code as return address and jumped to it. Therefore interrupt handler functions must diverge[^fn-must-diverge].
|
||||
|
||||
[^fn-must-diverge]: Another reason is that we overwrite the current register values by executing the handler function. Thus, the interrupted function looses its state and can't proceed anyway.
|
||||
|
||||
### IDT methods
|
||||
Let's add a function to create new interrupt descriptor tables:
|
||||
|
||||
```rust
|
||||
impl Idt {
|
||||
pub fn new() -> Idt {
|
||||
Idt([Entry::missing(); 16])
|
||||
}
|
||||
}
|
||||
|
||||
impl Entry {
|
||||
fn missing() -> Self {
|
||||
Entry {
|
||||
gdt_selector: SegmentSelector::new(0),
|
||||
pointer_low: 0,
|
||||
pointer_middle: 0,
|
||||
pointer_high: 0,
|
||||
options: EntryOptions::minimal(),
|
||||
reserved: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
The `missing` function creates a non-present Entry. We could choose any values for the pointer and GDT selector fields as long as the present bit is not set.
|
||||
|
||||
However, a table with non-present entries is not very useful. So we create a `set_handler` method to add new handler functions:
|
||||
|
||||
```rust
|
||||
impl Idt {
|
||||
pub fn set_handler(&mut self, entry: u8, handler: HandlerFunc)
|
||||
-> &mut EntryOptions
|
||||
{
|
||||
self.0[entry as usize] = Entry::new(segmentation::cs(), handler);
|
||||
&mut self.0[entry as usize].options
|
||||
}
|
||||
}
|
||||
```
|
||||
The method overwrites the specified entry with the given handler function. We use the `segmentation::cs`[^fn-segmentation-cs] function of the [x86 crate] to get the current code segment descriptor. There's no need for different kernel code segments in long mode, so the current `cs` value should be always the right choice.
|
||||
|
||||
[x86 crate]: https://github.com/gz/rust-x86
|
||||
[^fn-segmentation-cs]: The `segmentation::cs` function was [added](https://github.com/gz/rust-x86/pull/12) in version 0.7.0, so you might need to update your `x86` version in your `Cargo.toml`.
|
||||
|
||||
By returning a mutual reference to the entry's options, we allow the caller to override the default settings. For example, the caller could add a non-present entry by executing: `idt.set_handler(11, handler_fn).set_present(false)`.
|
||||
|
||||
### Loading the IDT
|
||||
Now we're able to create new interrupt descriptor tables with registered handler functions. We just need a way to load an IDT, so that the CPU uses it. The x86 architecture uses a special register to store the active IDT and its length. In order to load a new IDT we need to update this register through the [lidt] instruction.
|
||||
|
||||
[lidt]: http://x86.renejeschke.de/html/file_module_x86_id_156.html
|
||||
|
||||
The `lidt` instruction expects a pointer to a special data structure, which specifies the start address of the IDT and its length:
|
||||
|
||||
|
||||
Type | Name | Description
|
||||
--------|---------|-----------------------------------
|
||||
u16 | Limit | The maximum addressable byte in the table. Equal to the table size in bytes minus 1.
|
||||
u64 | Offset | Virtual start address of the table.
|
||||
|
||||
This structure is already contained [in the x86 crate], so we don't need to create it ourselves. The same is true for the [lidt function]. So we just need to put the pieces together to create a `load` method:
|
||||
|
||||
[in the x86 crate]: http://gz.github.io/rust-x86/x86/dtables/struct.DescriptorTablePointer.html
|
||||
[lidt function]: http://gz.github.io/rust-x86/x86/dtables/fn.lidt.html
|
||||
|
||||
```rust
|
||||
impl Idt {
|
||||
pub fn load(&self) {
|
||||
use x86::dtables::{DescriptorTablePointer, lidt};
|
||||
use core::mem::size_of;
|
||||
|
||||
let ptr = DescriptorTablePointer {
|
||||
base: self as *const _ as u64,
|
||||
limit: (size_of::<Self>() - 1) as u16,
|
||||
};
|
||||
|
||||
unsafe { lidt(&ptr) };
|
||||
}
|
||||
}
|
||||
```
|
||||
The method does not need to modify the IDT, so it takes `self` by immutable reference. We convert this reference to an u64 and calculate the table size using [mem::size_of]. The additional `-1` is needed because the limit field has to be the maximum addressable byte.
|
||||
|
||||
[mem::size_of]: https://doc.rust-lang.org/nightly/core/mem/fn.size_of.html
|
||||
|
||||
Then we pass a pointer to our `ptr` structure to the `lidt` function, which calls the `lidt` assembly instruction in order to reload the IDT register. We need an unsafe block here, because the `lidt` assumes that the specified handler addresses are valid.
|
||||
|
||||
#### Safety
|
||||
But can we really guarantee that handler addresses are always valid? Let's see:
|
||||
|
||||
- The `Idt::new` function creates a new table populated with non-present entries. There's no way to set these entries to present from outside of this module, so this function is fine.
|
||||
- The `set_handler` method allows us to overwrite a specified entry and point it to some handler function. Rust's type system guarantees that function pointers are always valid (as long as no `unsafe` is involved), so this function is fine, too.
|
||||
|
||||
There are no other public functions in the `idt` module (except `load`), so it should be safe… right?
|
||||
|
||||
Wrong! Imagine the following scenario:
|
||||
|
||||
```rust
|
||||
pub fn init() {
|
||||
load_idt();
|
||||
cause_page_fault();
|
||||
}
|
||||
|
||||
fn load_idt() {
|
||||
let mut idt = idt::Idt::new();
|
||||
idt.set_handler(14, page_fault_handler);
|
||||
idt.load();
|
||||
}
|
||||
|
||||
fn cause_page_fault() {
|
||||
let x = [1,2,3,4,5,6,7,8,9];
|
||||
unsafe{ *(0xdeadbeaf as *mut u64) = x[4] };
|
||||
}
|
||||
```
|
||||
This won't work. If we're lucky, we get a triple fault and a boot loop. If we're unlucky, our kernel does strange things and fails at some completely unrelated place. So what's the problem here?
|
||||
|
||||
Well, we construct an IDT _on the stack_ and load it. It is perfectly valid until the end of the `load_idt` function. But as soon as the function returns, its stack frame can be reused by other functions. Thus, the IDT gets overwritten by the stack frame of the `cause_page_fault` function. So when the page fault occurs and the CPU tries to read the entry, it only sees some garbage values and issues a double fault, which escalates to a triple fault and a CPU reset.
|
||||
|
||||
Now imagine that the `cause_page_fault` function declared an array of pointers instead. If the present was coincidentally set, the CPU would jump to some random pointer and interpret random memory as code. This would be a clear violation of memory safety.
|
||||
|
||||
#### Fixing the load method
|
||||
So how do we fix it? We could make the load function itself `unsafe` and push the unsafety to the caller. However, there is a much better solution in this case. In order to see it, we formulate the requirement for the `load` method:
|
||||
|
||||
> The referenced IDT must be valid until a new IDT is loaded.
|
||||
|
||||
We can't know when the next IDT will be loaded. Maybe never. So in the worst case:
|
||||
|
||||
> The referenced IDT must be valid as long as our kernel runs.
|
||||
|
||||
This is exactly the definition of a [static lifetime]. So we can easily ensure that the IDT lives long enough by adding a `'static` requirement to the signature of the `load` function:
|
||||
|
||||
[static lifetime]: http://rustbyexample.com/scope/lifetime/static_lifetime.html
|
||||
|
||||
```rust
|
||||
pub fn load(&'static self) {...}
|
||||
// ^^^^^^^ ensure that the IDT reference has the 'static lifetime
|
||||
```
|
||||
|
||||
That's it! Now the Rust compiler ensures that the above error can't happen anymore:
|
||||
|
||||
```
|
||||
error: `idt` does not live long enough
|
||||
--> src/interrupts/mod.rs:78:5
|
||||
78 |> idt.load();
|
||||
|> ^^^
|
||||
note: reference must be valid for the static lifetime...
|
||||
note: ...but borrowed value is only valid for the block suffix following
|
||||
statement 0 at 75:34
|
||||
--> src/interrupts/mod.rs:75:35
|
||||
75 |> let mut idt = idt::Idt::new();
|
||||
|> ^
|
||||
```
|
||||
|
||||
### A static IDT
|
||||
So a valid IDT needs to have the `'static` lifetime. We can either create a `static` IDT or [deliberately leak a Box][into_raw]. We will most likely only need a single IDT for the foreseeable future, so let's try the `static` approach:
|
||||
|
||||
[into_raw]: https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html#method.into_raw
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
static IDT: idt::Idt = {
|
||||
let mut idt = idt::Idt::new();
|
||||
|
||||
idt.set_handler(0, divide_by_zero_handler);
|
||||
|
||||
idt
|
||||
};
|
||||
|
||||
extern "C" fn divide_by_zero_handler() -> ! {
|
||||
println!("EXCEPTION: DIVIDE BY ZERO");
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
We register a single handler function for a [divide by zero error] (index 0). Like the name says, this exception occurs when dividing a number by 0. Thus we have an easy way to test our new exception handler.
|
||||
|
||||
[divide by zero error]: http://wiki.osdev.org/Exceptions#Divide-by-zero_Error
|
||||
|
||||
However, it doesn't work this way:
|
||||
|
||||
```
|
||||
error: calls in statics are limited to constant functions, struct and enum
|
||||
constructors [E0015]
|
||||
...
|
||||
error: blocks in statics are limited to items and tail expressions [E0016]
|
||||
...
|
||||
error: references in statics may only refer to immutable values [E0017]
|
||||
...
|
||||
```
|
||||
The reason is that the Rust compiler is not able to evaluate the value of the `static` at compile time. Maybe it will work someday when `const` functions become more powerful. But until then, we have to find another solution.
|
||||
|
||||
#### Lazy Statics to the Rescue
|
||||
Fortunately the `lazy_static` macro exists. Instead of evaluating a `static` at compile time, the macro performs the initialization when the `static` is referenced the first time. Thus, we can do almost everything in the initialization block and are even able to read runtime values.
|
||||
|
||||
With `lazy_static`, we can define our IDT without problems:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
lazy_static! {
|
||||
static ref IDT: idt::Idt = {
|
||||
let mut idt = idt::Idt::new();
|
||||
|
||||
idt.set_handler(0, divide_by_zero_handler);
|
||||
|
||||
idt
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Now we're ready to load our IDT! Therefore we add a `interrupts::init` function:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
pub fn init() {
|
||||
IDT.load();
|
||||
}
|
||||
```
|
||||
We don't need our `assert_has_not_been_called` macro here, since nothing bad happens when `init` is called twice. It just reloads the same IDT again.
|
||||
|
||||
## Testing it
|
||||
Now we should be able to catch page faults! Let's try it in our `rust_main`:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
pub extern "C" fn rust_main(...) {
|
||||
...
|
||||
memory::init(boot_info);
|
||||
|
||||
// initialize our IDT
|
||||
interrupts::init();
|
||||
|
||||
// provoke a divide-by-zero fault
|
||||
42 / 0;
|
||||
|
||||
println!("It did not crash!");
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
When we run it, we get a runtime panic:
|
||||
|
||||
```
|
||||
PANIC in src/lib.rs at line 57:
|
||||
attempted to divide by zero
|
||||
```
|
||||
|
||||
That's a not our exception handler. The reason is that Rust itself checks for a possible division by zero and panics in that case. So in order to raise a divide-by-zero error in the CPU, we need to bypass the Rust compiler somehow.
|
||||
|
||||
### Inline Assembly
|
||||
In order to cause a divide-by-zero exception, we need to execute a [div] or [idiv] assembly instruction with operand 0. We could write a small assembly function and call it from our Rust code. An easier way is to use Rust's [inline assembly] macro.
|
||||
|
||||
[div]: http://x86.renejeschke.de/html/file_module_x86_id_72.html
|
||||
[idiv]: http://x86.renejeschke.de/html/file_module_x86_id_137.html
|
||||
[inline assembly]: https://doc.rust-lang.org/book/inline-assembly.html
|
||||
|
||||
Inline assembly allows us to write raw x86 assembly within a Rust function. The feature is unstable, so we need to add `#![feature(asm)]` to our `src/lib.rs`. Then we're able to write a `divide_by_zero` function:
|
||||
|
||||
```rust
|
||||
fn divide_by_zero() {
|
||||
unsafe {
|
||||
asm!("mov dx, 0; div dx" ::: "ax", "dx" : "volatile", "intel")
|
||||
}
|
||||
}
|
||||
```
|
||||
Let's try to decode it:
|
||||
|
||||
- The `asm!` macro emits raw assembly instructions, so it's `unsafe` to use it.
|
||||
- We insert two assembly instructions here: `mov dx, 0` and `div dx`. The former loads a 0 into the `dx` register (a subset of `rdx`) and the latter divides the `ax` register by `dx`. (The `div` instruction always implicitly operates on the `ax` register).
|
||||
- The colons are separators. After the first `:` we could specify output operands and after the second `:` we could specify input operands. We need neither, so we leave these areas empty.
|
||||
- After the third colon, we specify the so-called _clobbers_. These tell the compiler that our assembly modifies the values of some registers. Otherwise, the compiler assumes that the registers preserve their value. In our case, we clobber `dx` (we load 0 to it) and `ax` (the `div` instruction places the result in it).
|
||||
- The last block (after the 4th colon) specifies some options. The `volatile` option tells the compiler: “This code has side effects. Do not delete it and do not move it elsewhere”. In our case, the “side effect” is the divide-by-zero exception. Finally, the `intel` option allows us to use the Intel assembly syntax instead of the default AT&T syntax.
|
||||
|
||||
Let's use our new `divide_by_zero` function to raise a CPU exception:
|
||||
|
||||
```rust
|
||||
// in src/lib.rs
|
||||
|
||||
pub extern "C" fn rust_main(...) {
|
||||
...
|
||||
|
||||
// provoke a divide-by-zero fault
|
||||
divide_by_zero();
|
||||
|
||||
println!("It did not crash!");
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
|
||||
It works! We see a `EXCEPTION: DIVIDE BY ZERO` message at the bottom of our screen:
|
||||
|
||||

|
||||
|
||||
### Exceptions inside println
|
||||
What happens when the exception occurs in the body of a `println`? Let's try:
|
||||
|
||||
```rust
|
||||
pub extern "C" fn rust_main(...) {
|
||||
...
|
||||
interrupts::init();
|
||||
|
||||
// provoke a divide by zero fault inside println
|
||||
println!("{:?}", divide_by_zero());
|
||||
|
||||
println!("It did not crash!");
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
Now the output ends on the `guard page` line. No `EXCEPTION` message and no `It did not crash` message either. What's happening?
|
||||
|
||||
#### Debugging
|
||||
Let's debug it using [GDB]. It is a console debugger and works with nearly everything, including QEMU. To make QEMU listen for a debugger connection, we start it with the `-s` flag:
|
||||
|
||||
[GDB]: https://www.gnu.org/software/gdb/
|
||||
|
||||
```Makefile
|
||||
# in `Makefile`
|
||||
|
||||
run: $(iso)
|
||||
@qemu-system-x86_64 -cdrom $(iso) -s
|
||||
```
|
||||
|
||||
Then we can launch GDB in another console window:
|
||||
|
||||
```
|
||||
> gdb build/kernel-x86_64.bin
|
||||
[some version, copyright, and usage information]
|
||||
Reading symbols from build/kernel-x86_64.bin...done.
|
||||
(gdb)
|
||||
```
|
||||
Now we can connect to our running QEMU instance on port `1234`:
|
||||
|
||||
```
|
||||
(gdb) target remote :1234
|
||||
Remote debugging using :1234
|
||||
0x00000000001031bd in spin::mutex::cpu_relax ()
|
||||
at /home/.../spin-0.3.5/src/mutex.rs:102
|
||||
102 unsafe { asm!("pause" :::: "volatile"); }
|
||||
```
|
||||
So we're locked in a function named `mutex::cpu_relax` inside the `spin` crate. Let's try a backtrace:
|
||||
|
||||
```
|
||||
(gdb) backtrace
|
||||
#0 0x00000000001031bd in spin::mutex::cpu_relax ()
|
||||
at /home/.../spin-0.3.5/src/mutex.rs:102
|
||||
#1 spin::mutex::{{impl}}::obtain_lock<blog_os::vga_buffer::Writer> (
|
||||
self=0x111230 <blog_os::vga_buffer::WRITER::h702c3f466147ac3b>)
|
||||
at /home/.../spin-0.3.5/src/mutex.rs:142
|
||||
#2 0x0000000000103143 in spin::mutex::{{impl}}::lock<blog_os::vga_buffer::
|
||||
Writer> (
|
||||
self=0x111230 <blog_os::vga_buffer::WRITER::h702c3f466147ac3b>)
|
||||
at /home/.../spin-0.3.5/src/mutex.rs:163
|
||||
#3 0x000000000010da59 in blog_os::interrupts::divide_by_zero_handler ()
|
||||
at src/vga_buffer.rs:31
|
||||
...
|
||||
```
|
||||
Pretty verbose… and very useful. Let's clean it up a bit:
|
||||
|
||||
- `spin::mutex::cpu_relax`
|
||||
- `spin::mutex::obtain_lock<vga_buffer::Writer>`
|
||||
- `spin::mutex::lock<vga_buffer::Writer>`
|
||||
- `blog_os::interrupts::divide_by_zero_handler`
|
||||
- ...
|
||||
|
||||
It's a _back_-trace, so it goes from the innermost function to the outermost function. We see that our divide-by-zero handler was called successfully. It then tried to write its error message. Therefore, it tried to `lock` the static `WRITER`, which in turn called `obtain_lock` and `cpu_relax`.
|
||||
|
||||
So our kernel tries to lock the output `WRITER`, which is already locked by the interrupted `println`. Thus, our exception handler waits forever and we don't see what error occurred. Yay, that's our first deadlock! :)
|
||||
|
||||
(As you see, GDB can be very useful sometimes. For more information about GDB check out our [Set Up GDB] page.)
|
||||
|
||||
[Set Up GDB]: {{% relref "set-up-gdb.md" %}}
|
||||
|
||||
## Printing Errors Reliably
|
||||
In order to guarantee that we always see error messages, we add a `print_error` function to our `vga_buffer` module:
|
||||
|
||||
```rust
|
||||
// in src/vga_buffer.rs
|
||||
|
||||
pub unsafe fn print_error(fmt: fmt::Arguments) {
|
||||
use core::fmt::Write;
|
||||
|
||||
let mut writer = Writer {
|
||||
column_position: 0,
|
||||
color_code: ColorCode::new(Color::Red, Color::Black),
|
||||
buffer: Unique::new(0xb8000 as *mut _),
|
||||
};
|
||||
writer.new_line();
|
||||
writer.write_fmt(fmt);
|
||||
}
|
||||
```
|
||||
|
||||
Instead of using the static `WRITER`, this function creates a new `Writer` on each invocation. Thereby it ignores the mutex and is always able to print to the screen without deadlocking. We print in red to highlight the error and add a newline to avoid overwriting unfinished lines.
|
||||
|
||||
### Safety
|
||||
This function clearly violates the invariants of the `vga_buffer` module, as it creates another `Unique` pointing to `0xb8000`. Thus, we deliberately introduce a data race on the VGA buffer. For this reason, the function is marked as `unsafe` and should only be used if absolutely necessary.
|
||||
|
||||
However, the situation is not _that_ bad. The VGA buffer only stores characters (no pointers) and we never rely on the buffer's values. So the function might cause mangled output, but should never be able to violate memory safety. Nevertheless, we will implement a better solution in a future post.
|
||||
|
||||
### Using print_error
|
||||
Let's use the new `print_error` function to print the divide-by-zero error:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
use vga_buffer::print_error;
|
||||
|
||||
extern "C" fn divide_by_zero_handler() -> ! {
|
||||
unsafe { print_error(format_args!("EXCEPTION: DIVIDE BY ZERO")) };
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
We use the built-in [format_args] macro to translate the error string to a `fmt::Arguments` type. Now we should always see the error message, even if the exception occurred inside `println`:
|
||||
|
||||
[format_args]: https://doc.rust-lang.org/nightly/std/macro.format_args!.html
|
||||
|
||||

|
||||
|
||||
## What's next?
|
||||
We've successfully caught our first exception! However, our `EXCEPTION: DIVIDE BY ZERO` message doesn't contain much information about the cause of the exception. The next post improves the situation by printing i.a. the current stack pointer and address of the causing instruction. We will also explore other exceptions such as page faults, for which the CPU pushes an _error code_ on the stack.
|
||||
@@ -1,235 +0,0 @@
|
||||
+++
|
||||
title = "Exception Diagnostics"
|
||||
date = "2016-06-15"
|
||||
+++
|
||||
|
||||
In the [previous post], we've set up an interrupt descriptor table in order to catch divide by zero faults. In this post, we will explore exceptions in more detail. Our goal is to print additional information when an exception occurs, for example the values of the instruction and stack pointer at that time. We will also add handler functions for page and double faults.
|
||||
|
||||
[previous post]: {{% relref "2016-05-28-catching-exceptions.md" %}}
|
||||
|
||||
<!--more-->
|
||||
|
||||
As always, the complete source code is on [Github]. Please file [issues] for any problems, questions, or improvement suggestions. There is also a comment section at the end of this page.
|
||||
|
||||
[Github]: https://github.com/phil-opp/blog_os/tree/TODO
|
||||
[issues]: https://github.com/phil-opp/blog_os/issues
|
||||
|
||||
## Exceptions in Detail
|
||||
An exception signals that something is wrong with the current instruction. So an exception is always caused by a specific assembly instruction. When an exception occurs, the CPU interrupts its current work and starts an internal exception routine.
|
||||
|
||||
This routine involves reading the interrupt descriptor table and invoking the registered handler function. But first, the CPU pushes various information onto the stack, which describe the current state and provide information about the cause of the exception:
|
||||
|
||||

|
||||
|
||||
The pushed information contain the instruction and stack pointer, the current CPU flags, and (for some exceptions) an error code, which gives information about the exceptions cause. Let's look at the fields in detail:
|
||||
|
||||
- First, the CPU aligns the stack pointer on a 16-byte boundary. This allows us to use some SSE instructions, which expect such an alignment.
|
||||
- After that, the CPU pushes the stack segment descriptor (SS) and the old stack pointer (from before the alignment) onto the stack. This allows us to restore the previous stack pointer when we want to continue the interrupted program.
|
||||
- Then the CPU pushes the contents of the RFLAGS register. This register contains various state information of the interrupted program. For example, it indicates if interrupts were enabled and whether the last executed instruction returned zero.
|
||||
- Next the CPU pushes the instruction pointer and its code segment descriptor onto the stack. This tells us the address of the last executed instruction, which caused the exception.
|
||||
- Finally, the CPU pushes an error code for some exceptions. This error code only exists for some exceptions such as page faults or general protection faults and provides additional information. For example, it tells us whether a page fault was caused by a read or a write request.
|
||||
|
||||
## Printing the Exception Stack Frame
|
||||
Let's create a struct that represents the exception stack frame:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
#[derive(Debug)]
|
||||
#[repr(C)]
|
||||
struct ExceptionStackFrame {
|
||||
instruction_pointer: u64,
|
||||
code_segment: u64,
|
||||
cpu_flags: u64,
|
||||
stack_pointer: u64,
|
||||
stack_segment: u64,
|
||||
}
|
||||
```
|
||||
The divide-by-zero fault pushes no error code, so we leave it out. Note that the stack grows downwards in memory, so we need to declare the fields in reverse order.
|
||||
|
||||
Now we need a way to find the memory address of this stack frame. When we look at the above graphic again, we see that the start address of the exception stack frame is the new stack pointer. So we just need to read the value of `rsp` at the very beginning of our handler function:
|
||||
|
||||
```rust
|
||||
// in src/interrupts/mod.rs
|
||||
|
||||
extern "C" fn divide_by_zero_handler() -> ! {
|
||||
let stack_frame: *const ExceptionStackFrame;
|
||||
unsafe {
|
||||
asm!("mov $0, rsp" : "=r"(stack_frame) ::: "intel");
|
||||
print_error(format_args!("EXCEPTION: DIVIDE BY ZERO\n{:#?}",
|
||||
*stack_frame));
|
||||
};
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
We're using [inline assembly] here to load the value from the `rsp` register into `stack_frame`. The syntax is a bit strange, therefore a quick explanation:
|
||||
|
||||
[inline assembly]: https://doc.rust-lang.org/book/inline-assembly.html
|
||||
|
||||
- The asm! macro emits raw assembly instructions. This is the only way to read raw register values in Rust.
|
||||
- We insert a single assembly instruction here: `mov $0, rsp`. It moves the value of `rsp` to some register (the `$0` is a placeholder which is filled by the compiler).
|
||||
- The colons are separators. The `asm!` macro expects output operands after the first colon. We're specifying our `stack_frame` variable as a single output operand here. The `=r` tells the compiler that it should use any register for the first placeholder `$0`.
|
||||
- We don't need any input operands or so-called [clobbers], so we leave the blocks after the second and third colon empty.
|
||||
- The last block (after the 4th colon) specifies options. The `intel` option tells the compiler that our code is in Intel assembly syntax (instead of the default AT&T syntax).
|
||||
|
||||
[clobbers]: https://doc.rust-lang.org/book/inline-assembly.html#clobbers
|
||||
|
||||
So we're loading the value stack pointer to `stack_frame` at the very beginning of our function. Thus we have a pointer to the exception stack frame in that variable and are able to pretty-print its `Debug` formatting through the `{:#?}` argument.
|
||||
|
||||
### Testing it
|
||||
Let's try it by executing `make run`:
|
||||
|
||||

|
||||
|
||||
Those values look very wrong. The instruction pointer is definitely not 1 and the code segment should be `0x8`. So what's going on here?
|
||||
|
||||
It seems like we somehow got the pointer wrong. The exception stack frame graphic and our inline assembly seem correct, so something must be modifying `rsp` before we load it into `stack_frame`.
|
||||
|
||||
Let's see what's happening by looking at the disassembly of our function:
|
||||
|
||||
```
|
||||
> objdump -d build/kernel-x86_64.bin | grep -A20 "divide_by_zero_handler"
|
||||
|
||||
[...]
|
||||
000000000010ced0 <_ZN7blog_os10interrupts22divide_by_zero_handler17h621c1e80480189e8E>:
|
||||
10ced0: 55 push %rbp
|
||||
10ced1: 48 89 e5 mov %rsp,%rbp
|
||||
10ced4: 48 81 ec b0 00 00 00 sub $0xb0,%rsp
|
||||
10cedb: 48 8d 45 98 lea -0x68(%rbp),%rax
|
||||
10cedf: 48 b9 1d 1d 1d 1d 1d movabs $0x1d1d1d1d1d1d1d1d,%rcx
|
||||
10cee6: 1d 1d 1d
|
||||
10cee9: 48 89 4d 98 mov %rcx,-0x68(%rbp)
|
||||
10ceed: 48 89 4d f8 mov %rcx,-0x8(%rbp)
|
||||
10cef1: 48 89 e1 mov %rsp,%rcx
|
||||
10cef4: 48 89 4d f8 mov %rcx,-0x8(%rbp)
|
||||
10cef8: ...
|
||||
[...]
|
||||
```
|
||||
Our `divide_by_zero_handler` starts at address `0x10ced0`. Let's look at the instruction at address `0x10cef1`:
|
||||
|
||||
```
|
||||
mov %rsp,%rcx
|
||||
```
|
||||
It's in AT&T syntax and contains `rcx` instead of our `$0` placeholder, but it is in fact our inline assembly instruction, which loads the stack pointer into the `stack_frame` variable. It moves `rsp` to `rcx` first, and then the next instruction at `0x10cef8` moves `rcx` to the variable on the stack.
|
||||
|
||||
We can clearly see the problem here: The compiler inserted various other instructions before our inline assembly. These instructions modify the stack pointer so that we don't read the original `rsp` value and get a wrong pointer. But why is the compiler doing this?
|
||||
|
||||
The reason is that we need some place on the stack to store things like variables. Therefore the compiler inserts a so-called function _prologue_ which prepares the stack and reserves space for all variables. In our case, the compiler subtracts from the stack to make room for i.a. our `stack_frame` variable. This prologue is the first thing in every function and comes before every other code. So in order to correctly load the exception frame pointer, we need some way to circumvent the automatic prologue generation.
|
||||
|
||||
### Naked Functions
|
||||
Fortunately there is a way to disable the prologue: [naked functions]. A naked function has no prologue and immediately starts with the first instruction of its body. However, most Rust code requires the prologue. Therefore naked functions should only contain inline assembly.
|
||||
|
||||
[naked functions]: https://github.com/rust-lang/rfcs/blob/master/text/1201-naked-fns.md
|
||||
|
||||
A naked function looks like this:
|
||||
|
||||
```rust
|
||||
#[naked]
|
||||
extern "C" fn naked_function_example() {
|
||||
unsafe {
|
||||
asm!("mov rax, 0x42" :::: "intel");
|
||||
};
|
||||
}
|
||||
```
|
||||
Naked functions are highly unstable, so we need to add `#![feature(naked_functions)]` to our `src/lib.rs`.
|
||||
|
||||
If you want to try it, insert it in `src/lib.rs` and call it from `rust_main`. When we inspect the disassembly, we see that the function prologue is missing:
|
||||
|
||||
```
|
||||
> objdump -d build/kernel-x86_64.bin | grep -A5 "naked_function_example"
|
||||
[...]
|
||||
000000000010df90 <_ZN7blog_os22naked_function_example17ha9f733dfe42b595dE>:
|
||||
10df90: 48 c7 c0 2a 00 00 00 mov $0x42,%rax
|
||||
10df97: c3 retq
|
||||
10df98: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
|
||||
10df9f: 00
|
||||
```
|
||||
It contains just the specified inline assembly and a return instruction (you can ignore the junk values after the return statement). So let's try to use a naked function to retrieve the exception frame pointer.
|
||||
|
||||
### A Naked Exception Handler
|
||||
We can't use Rust code in naked functions, but we still want to use Rust in our exception handler. Therefore we split our handler function in two parts. A main exception handler in Rust and a small naked wrapper function, which just loads the exception frame pointer and then calls the main handler.
|
||||
|
||||
Our new two-stage exception handler looks like this:
|
||||
|
||||
```rust
|
||||
#[naked]
|
||||
extern "C" fn divide_by_zero_handler() -> ! {
|
||||
unsafe {
|
||||
asm!(/* load exception frame pointer and call main_handler */);
|
||||
}
|
||||
::core::intrinsics::unreachable();
|
||||
|
||||
extern "C" fn main_handler(stack_frame: *const ExceptionStackFrame) -> ! {
|
||||
unsafe {
|
||||
print_error(format_args!("EXCEPTION: DIVIDE BY ZERO\n{:#?}",
|
||||
*stack_frame));
|
||||
}
|
||||
loop {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
TODO:
|
||||
|
||||
- unreachable
|
||||
- pointer as argument
|
||||
- inner function
|
||||
|
||||
-----
|
||||
|
||||
## Failure on real Hardware
|
||||
|
||||
- reproduce using `-enable-kvm`
|
||||
- debugging using `loop {}` and gdb
|
||||
- frame pointer and thus stack pointer alignment wrong
|
||||
- requirements system v
|
||||
- stack frame high level (xx bytes)
|
||||
- hacky workaround (`push 0`)
|
||||
- `extern "C" fn() -> !` not the correct handler function type
|
||||
- assembly stub required to ensure correct stack alignment
|
||||
- naked functions for handlers with and without error code (`push 0`, `call`)
|
||||
|
||||
## Exception Stack Frame
|
||||
In order to read values such as the error code or the address of the interrupted instruction, we need to know how the CPU modifies the stack when an exception occurs:
|
||||
|
||||
|
||||
When an exception occurs, the CPU:
|
||||
|
||||
1. Aligns the stack pointer on a 16-byte boundary.
|
||||
2. Pushes the stack segment descriptor (SS) and the old stack pointer (from before the alignment) onto the stack. The SS value is padded to 8 bytes.
|
||||
3. Pushes the 64-bit RFLAGS register onto the stack.
|
||||
4. Pushes the previous CS register and RIP register onto the stack. The CS value is padded to 8 bytes.
|
||||
5. If the interrupt vector number has an error code associated with it, pushes the error code onto the stack. The error code is padded with four bytes to form a quadword.
|
||||
6. Loads the offset field from the gate descriptor into the target RIP. The interrupt handler begins execution when control is transferred to the instruction referenced by the new RIP.
|
||||
|
||||
```rust
|
||||
#[repr(C)]
|
||||
struct ExceptionStackFrame {
|
||||
stack_segment: u64,
|
||||
stack_pointer: u64,
|
||||
cpu_flags: u64,
|
||||
code_segment: u64,
|
||||
instruction_pointer: u64,
|
||||
}
|
||||
```
|
||||
|
||||
## What's next?
|
||||
Now TODO. However, some page faults still cause a triple fault and a bootloop. For example, try the following code:
|
||||
|
||||
```rust
|
||||
pub extern "C" fn rust_main(...) {
|
||||
...
|
||||
interrupts::init();
|
||||
|
||||
// provoke a kernel stack overflow, which hits the guard page
|
||||
fn recursive() {
|
||||
recursive();
|
||||
}
|
||||
recursive();
|
||||
|
||||
println!("It did not crash!");
|
||||
loop {}
|
||||
}
|
||||
```
|
||||
|
||||
The next post will explore and fix this triple fault by creating a double fault handler. After that, we should never again experience a triple fault in our kernel.
|
||||
@@ -1,3 +1,12 @@
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(const_fn)]
|
||||
#![feature(allocator)]
|
||||
|
||||
@@ -35,7 +44,7 @@ impl BumpAllocator {
|
||||
/// Allocates a block of memory with the given size and alignment.
|
||||
fn allocate(&mut self, size: usize, align: usize) -> Option<*mut u8> {
|
||||
let alloc_start = align_up(self.next, align);
|
||||
let alloc_end = alloc_start + size;
|
||||
let alloc_end = alloc_start.saturating_add(size);
|
||||
|
||||
if alloc_end <= self.heap_start + self.heap_size {
|
||||
self.next = alloc_end;
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(allocator)]
|
||||
#![feature(const_fn)]
|
||||
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/bin/sh
|
||||
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# Exit if anything fails
|
||||
set -e
|
||||
|
||||
my_dir=`dirname $0`
|
||||
|
||||
# Copy internal script to a temporary untracked file because an untracked
|
||||
# file is kept by git when switching branches (that way we can update tags
|
||||
# where this script doesn't exist).
|
||||
cp "$my_dir/cherry_pick_to_tags_internal.sh" "$my_dir/cherry_pick_to_tags_internal_tmp.sh"
|
||||
sh "$my_dir/cherry_pick_to_tags_internal_tmp.sh" $*
|
||||
rm "$my_dir/cherry_pick_to_tags_internal_tmp.sh"
|
||||
@@ -1,44 +0,0 @@
|
||||
#!/bin/sh
|
||||
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# Exit if anything fails
|
||||
set -e
|
||||
|
||||
if [ "$#" -lt 2 ]; then
|
||||
echo "Usage: $0 COMMIT_HASH TAGS" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
commit="$1"
|
||||
|
||||
echo "current branch $branch_name"
|
||||
echo "commit hash $commit"
|
||||
|
||||
# update tags
|
||||
git fetch --tags
|
||||
|
||||
shift
|
||||
for tag in "$@"; do
|
||||
echo "UPDATING TAG $tag"
|
||||
{
|
||||
git co "$tag"
|
||||
|
||||
# cherry pick commit and update tag
|
||||
git cherry-pick -x "$commit"
|
||||
git tag -f "$tag" HEAD
|
||||
|
||||
# switch back to previous branch
|
||||
git co -
|
||||
|
||||
# push the updated tag
|
||||
git push origin "$tag" --force
|
||||
} >/dev/null
|
||||
done
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
# build rust project
|
||||
make
|
||||
|
||||
# clone hugo branch, which contains the blog template
|
||||
git clone --branch=hugo https://github.com/phil-opp/blog_os.git hugo
|
||||
cd hugo
|
||||
|
||||
# download hugo
|
||||
wget https://github.com/spf13/hugo/releases/download/v0.15/hugo_0.15_linux_amd64.tar.gz
|
||||
tar xf hugo_0.15_linux_amd64.tar.gz
|
||||
|
||||
# build the blog
|
||||
hugo_0.15_linux_amd64/hugo_0.15_linux_amd64
|
||||
|
||||
cd ..
|
||||
@@ -1,24 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
[ "$TRAVIS_BRANCH" = master ]
|
||||
[ "$TRAVIS_PULL_REQUEST" = false ]
|
||||
|
||||
body='{
|
||||
"request": {
|
||||
"branch":"hugo",
|
||||
"config": {
|
||||
"env": {
|
||||
"matrix": ["UPDATE_COMMIT='"$TRAVIS_COMMIT"'"]
|
||||
}
|
||||
}
|
||||
}}'
|
||||
|
||||
curl -s -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Accept: application/json" \
|
||||
-H "Travis-API-Version: 3" \
|
||||
-H "Authorization: token $TRAVIS_TOKEN" \
|
||||
-d "$body" \
|
||||
https://api.travis-ci.org/repo/phil-opp%2Fblog_os/requests
|
||||
@@ -1,4 +1,4 @@
|
||||
; Copyright 2015 Philipp Oppermann. See the README.md
|
||||
; Copyright 2016 Philipp Oppermann. See the README.md
|
||||
; file at the top-level directory of this distribution.
|
||||
;
|
||||
; Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -29,12 +29,6 @@ start:
|
||||
; load the 64-bit GDT
|
||||
lgdt [gdt64.pointer]
|
||||
|
||||
; update selectors
|
||||
mov ax, gdt64.data
|
||||
mov ss, ax
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
|
||||
jmp gdt64.code:long_mode_start
|
||||
|
||||
set_up_page_tables:
|
||||
@@ -202,9 +196,7 @@ section .rodata
|
||||
gdt64:
|
||||
dq 0 ; zero entry
|
||||
.code: equ $ - gdt64 ; new
|
||||
dq (1<<44) | (1<<47) | (1<<41) | (1<<43) | (1<<53) ; code segment
|
||||
.data: equ $ - gdt64 ; new
|
||||
dq (1<<44) | (1<<47) | (1<<41) ; data segment
|
||||
dq (1<<44) | (1<<47) | (1<<43) | (1<<53) ; code segment
|
||||
.pointer:
|
||||
dw $ - gdt64 - 1
|
||||
dq gdt64
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2015 Philipp Oppermann. See the README.md
|
||||
# Copyright 2016 Philipp Oppermann. See the README.md
|
||||
# file at the top-level directory of this distribution.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright 2015 Philipp Oppermann. See the README.md
|
||||
Copyright 2016 Philipp Oppermann. See the README.md
|
||||
file at the top-level directory of this distribution.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; Copyright 2015 Philipp Oppermann. See the README.md
|
||||
; Copyright 2016 Philipp Oppermann. See the README.md
|
||||
; file at the top-level directory of this distribution.
|
||||
;
|
||||
; Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -13,6 +13,14 @@ extern rust_main
|
||||
section .text
|
||||
bits 64
|
||||
long_mode_start:
|
||||
; load 0 into all data segment registers
|
||||
mov ax, 0
|
||||
mov ss, ax
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
mov fs, ax
|
||||
mov gs, ax
|
||||
|
||||
; call rust main (with multiboot pointer in rdi)
|
||||
call rust_main
|
||||
.os_returned:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; Copyright 2015 Philipp Oppermann. See the README.md
|
||||
; Copyright 2016 Philipp Oppermann. See the README.md
|
||||
; file at the top-level directory of this distribution.
|
||||
;
|
||||
; Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,14 @@
|
||||
use x86::segmentation::{self, SegmentSelector};
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use x86::shared::segmentation::{self, SegmentSelector};
|
||||
use x86::shared::PrivilegeLevel;
|
||||
|
||||
pub struct Idt([Entry; 16]);
|
||||
|
||||
@@ -13,11 +23,11 @@ impl Idt {
|
||||
}
|
||||
|
||||
pub fn load(&'static self) {
|
||||
use x86::dtables::{DescriptorTablePointer, lidt};
|
||||
use x86::shared::dtables::{DescriptorTablePointer, lidt};
|
||||
use core::mem::size_of;
|
||||
|
||||
let ptr = DescriptorTablePointer {
|
||||
base: self as *const _ as u64,
|
||||
base: self as *const _ as *const ::x86::bits64::irq::IdtEntry,
|
||||
limit: (size_of::<Self>() - 1) as u16,
|
||||
};
|
||||
|
||||
@@ -53,7 +63,7 @@ impl Entry {
|
||||
|
||||
fn missing() -> Self {
|
||||
Entry {
|
||||
gdt_selector: SegmentSelector::new(0),
|
||||
gdt_selector: SegmentSelector::new(0, PrivilegeLevel::Ring0),
|
||||
pointer_low: 0,
|
||||
pointer_middle: 0,
|
||||
pointer_high: 0,
|
||||
@@ -66,12 +76,12 @@ impl Entry {
|
||||
use bit_field::BitField;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EntryOptions(BitField<u16>);
|
||||
pub struct EntryOptions(u16);
|
||||
|
||||
impl EntryOptions {
|
||||
fn minimal() -> Self {
|
||||
let mut options = BitField::new(0);
|
||||
options.set_range(9..12, 0b111); // 'must-be-one' bits
|
||||
let mut options = 0;
|
||||
options.set_bits(9..12, 0b111); // 'must-be-one' bits
|
||||
EntryOptions(options)
|
||||
}
|
||||
|
||||
@@ -91,13 +101,15 @@ impl EntryOptions {
|
||||
self
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn set_privilege_level(&mut self, dpl: u16) -> &mut Self {
|
||||
self.0.set_range(13..15, dpl);
|
||||
self.0.set_bits(13..15, dpl);
|
||||
self
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn set_stack_index(&mut self, index: u16) -> &mut Self {
|
||||
self.0.set_range(0..3, index);
|
||||
self.0.set_bits(0..3, index);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,58 @@
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
mod idt;
|
||||
|
||||
macro_rules! handler {
|
||||
($name: ident) => {{
|
||||
#[naked]
|
||||
extern "C" fn wrapper() -> ! {
|
||||
unsafe {
|
||||
asm!("mov rdi, rsp
|
||||
sub rsp, 8 // align the stack pointer
|
||||
call $0"
|
||||
:: "i"($name as extern "C" fn(
|
||||
&ExceptionStackFrame) -> !)
|
||||
: "rdi" : "intel");
|
||||
::core::intrinsics::unreachable();
|
||||
}
|
||||
}
|
||||
wrapper
|
||||
}}
|
||||
}
|
||||
|
||||
macro_rules! handler_with_error_code {
|
||||
($name: ident) => {{
|
||||
#[naked]
|
||||
extern "C" fn wrapper() -> ! {
|
||||
unsafe {
|
||||
asm!("pop rsi // pop error code into rsi
|
||||
mov rdi, rsp
|
||||
sub rsp, 8 // align the stack pointer
|
||||
call $0"
|
||||
:: "i"($name as extern "C" fn(
|
||||
&ExceptionStackFrame, u64) -> !)
|
||||
: "rdi","rsi" : "intel");
|
||||
::core::intrinsics::unreachable();
|
||||
}
|
||||
}
|
||||
wrapper
|
||||
}}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref IDT: idt::Idt = {
|
||||
let mut idt = idt::Idt::new();
|
||||
|
||||
idt.set_handler(0, divide_by_zero_handler);
|
||||
idt.set_handler(0, handler!(divide_by_zero_handler));
|
||||
idt.set_handler(6, handler!(invalid_opcode_handler));
|
||||
idt.set_handler(14, handler_with_error_code!(page_fault_handler));
|
||||
|
||||
idt
|
||||
};
|
||||
@@ -14,9 +62,44 @@ pub fn init() {
|
||||
IDT.load();
|
||||
}
|
||||
|
||||
use vga_buffer::print_error;
|
||||
#[derive(Debug)]
|
||||
#[repr(C)]
|
||||
struct ExceptionStackFrame {
|
||||
instruction_pointer: u64,
|
||||
code_segment: u64,
|
||||
cpu_flags: u64,
|
||||
stack_pointer: u64,
|
||||
stack_segment: u64,
|
||||
}
|
||||
|
||||
extern "C" fn divide_by_zero_handler() -> ! {
|
||||
unsafe { print_error(format_args!("EXCEPTION: DIVIDE BY ZERO")) };
|
||||
extern "C" fn divide_by_zero_handler(stack_frame: &ExceptionStackFrame) -> ! {
|
||||
println!("\nEXCEPTION: DIVIDE BY ZERO\n{:#?}", stack_frame);
|
||||
loop {}
|
||||
}
|
||||
|
||||
extern "C" fn invalid_opcode_handler(stack_frame: &ExceptionStackFrame) -> ! {
|
||||
println!("\nEXCEPTION: INVALID OPCODE at {:#x}\n{:#?}",
|
||||
stack_frame.instruction_pointer,
|
||||
stack_frame);
|
||||
loop {}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
flags PageFaultErrorCode: u64 {
|
||||
const PROTECTION_VIOLATION = 1 << 0,
|
||||
const CAUSED_BY_WRITE = 1 << 1,
|
||||
const USER_MODE = 1 << 2,
|
||||
const MALFORMED_TABLE = 1 << 3,
|
||||
const INSTRUCTION_FETCH = 1 << 4,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn page_fault_handler(stack_frame: &ExceptionStackFrame, error_code: u64) -> ! {
|
||||
use x86::shared::control_regs;
|
||||
println!("\nEXCEPTION: PAGE FAULT while accessing {:#x}\nerror code: \
|
||||
{:?}\n{:#?}",
|
||||
unsafe { control_regs::cr2() },
|
||||
PageFaultErrorCode::from_bits(error_code).unwrap(),
|
||||
stack_frame);
|
||||
loop {}
|
||||
}
|
||||
|
||||
22
src/lib.rs
22
src/lib.rs
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -11,9 +11,12 @@
|
||||
#![feature(const_fn, unique)]
|
||||
#![feature(alloc, collections)]
|
||||
#![feature(asm)]
|
||||
#![feature(naked_functions)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![no_std]
|
||||
|
||||
extern crate rlibc;
|
||||
extern crate volatile;
|
||||
extern crate spin;
|
||||
extern crate multiboot2;
|
||||
#[macro_use]
|
||||
@@ -50,19 +53,16 @@ pub extern "C" fn rust_main(multiboot_information_address: usize) {
|
||||
// initialize our IDT
|
||||
interrupts::init();
|
||||
|
||||
fn divide_by_zero() {
|
||||
unsafe { asm!("mov dx, 0; div dx" ::: "ax", "dx" : "volatile", "intel") }
|
||||
}
|
||||
// provoke a page fault
|
||||
unsafe { *(0xdeadbeaf as *mut u64) = 42 };
|
||||
|
||||
// provoke a divide by zero fault inside println
|
||||
println!("{:?}", divide_by_zero());
|
||||
|
||||
println!("It did not crash!");
|
||||
loop {}
|
||||
}
|
||||
|
||||
fn enable_nxe_bit() {
|
||||
use x86::msr::{IA32_EFER, rdmsr, wrmsr};
|
||||
use x86::shared::msr::{IA32_EFER, rdmsr, wrmsr};
|
||||
|
||||
let nxe_bit = 1 << 11;
|
||||
unsafe {
|
||||
@@ -72,10 +72,9 @@ fn enable_nxe_bit() {
|
||||
}
|
||||
|
||||
fn enable_write_protect_bit() {
|
||||
use x86::controlregs::{cr0, cr0_write};
|
||||
use x86::shared::control_regs::{cr0, cr0_write, CR0_WRITE_PROTECT};
|
||||
|
||||
let wp_bit = 1 << 16;
|
||||
unsafe { cr0_write(cr0() | wp_bit) };
|
||||
unsafe { cr0_write(cr0() | CR0_WRITE_PROTECT) };
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
@@ -84,7 +83,8 @@ extern "C" fn eh_personality() {}
|
||||
|
||||
#[cfg(not(test))]
|
||||
#[lang = "panic_fmt"]
|
||||
extern "C" fn panic_fmt(fmt: core::fmt::Arguments, file: &str, line: u32) -> ! {
|
||||
#[no_mangle]
|
||||
pub extern "C" fn panic_fmt(fmt: core::fmt::Arguments, file: &'static str, line: u32) -> ! {
|
||||
println!("\n\nPANIC in {} at line {}:", file, line);
|
||||
println!(" {}", fmt);
|
||||
loop {}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -111,7 +111,7 @@ impl Mapper {
|
||||
.expect("mapping code does not support huge pages");
|
||||
let frame = p1[page.p1_index()].pointed_frame().unwrap();
|
||||
p1[page.p1_index()].set_unused();
|
||||
unsafe { ::x86::tlb::flush(page.start_address()) };
|
||||
unsafe { ::x86::shared::tlb::flush(page.start_address()) };
|
||||
// TODO free p(1,2,3) table if empty
|
||||
// allocator.deallocate_frame(frame);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -110,11 +110,11 @@ impl ActivePageTable {
|
||||
f: F)
|
||||
where F: FnOnce(&mut Mapper)
|
||||
{
|
||||
use x86::{controlregs, tlb};
|
||||
use x86::shared::{control_regs, tlb};
|
||||
let flush_tlb = || unsafe { tlb::flush_all() };
|
||||
|
||||
{
|
||||
let backup = Frame::containing_address(unsafe { controlregs::cr3() } as usize);
|
||||
let backup = Frame::containing_address(unsafe { control_regs::cr3() } as usize);
|
||||
|
||||
// map temporary_page to current p4 table
|
||||
let p4_table = temporary_page.map_table_frame(backup.clone(), self);
|
||||
@@ -135,13 +135,13 @@ impl ActivePageTable {
|
||||
}
|
||||
|
||||
pub fn switch(&mut self, new_table: InactivePageTable) -> InactivePageTable {
|
||||
use x86::controlregs;
|
||||
use x86::shared::control_regs;
|
||||
|
||||
let old_table = InactivePageTable {
|
||||
p4_frame: Frame::containing_address(unsafe { controlregs::cr3() } as usize),
|
||||
p4_frame: Frame::containing_address(unsafe { control_regs::cr3() } as usize),
|
||||
};
|
||||
unsafe {
|
||||
controlregs::cr3_write(new_table.p4_frame.start_address() as u64);
|
||||
control_regs::cr3_write(new_table.p4_frame.start_address());
|
||||
}
|
||||
old_table
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Philipp Oppermann. See the README.md
|
||||
// Copyright 2016 Philipp Oppermann. See the README.md
|
||||
// file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
@@ -10,6 +10,7 @@
|
||||
use core::ptr::Unique;
|
||||
use core::fmt;
|
||||
use spin::Mutex;
|
||||
use volatile::Volatile;
|
||||
|
||||
const BUFFER_HEIGHT: usize = 25;
|
||||
const BUFFER_WIDTH: usize = 80;
|
||||
@@ -27,31 +28,23 @@ macro_rules! println {
|
||||
|
||||
macro_rules! print {
|
||||
($($arg:tt)*) => ({
|
||||
use core::fmt::Write;
|
||||
$crate::vga_buffer::WRITER.lock().write_fmt(format_args!($($arg)*)).unwrap();
|
||||
$crate::vga_buffer::print(format_args!($($arg)*));
|
||||
});
|
||||
}
|
||||
|
||||
pub fn print(args: fmt::Arguments) {
|
||||
use core::fmt::Write;
|
||||
WRITER.lock().write_fmt(args).unwrap();
|
||||
}
|
||||
|
||||
pub fn clear_screen() {
|
||||
for _ in 0..BUFFER_HEIGHT {
|
||||
println!("");
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn print_error(fmt: fmt::Arguments) {
|
||||
use core::fmt::Write;
|
||||
|
||||
let mut writer = Writer {
|
||||
column_position: 0,
|
||||
color_code: ColorCode::new(Color::Red, Color::Black),
|
||||
buffer: Unique::new(0xb8000 as *mut _),
|
||||
};
|
||||
writer.new_line();
|
||||
writer.write_fmt(fmt);
|
||||
}
|
||||
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(u8)]
|
||||
pub enum Color {
|
||||
Black = 0,
|
||||
@@ -89,10 +82,12 @@ impl Writer {
|
||||
let row = BUFFER_HEIGHT - 1;
|
||||
let col = self.column_position;
|
||||
|
||||
self.buffer().chars[row][col] = ScreenChar {
|
||||
let color_code = self.color_code;
|
||||
|
||||
self.buffer().chars[row][col].write(ScreenChar {
|
||||
ascii_character: byte,
|
||||
color_code: self.color_code,
|
||||
};
|
||||
color_code: color_code,
|
||||
});
|
||||
self.column_position += 1;
|
||||
}
|
||||
}
|
||||
@@ -103,9 +98,12 @@ impl Writer {
|
||||
}
|
||||
|
||||
fn new_line(&mut self) {
|
||||
for row in 0..(BUFFER_HEIGHT - 1) {
|
||||
for row in 1..BUFFER_HEIGHT {
|
||||
for col in 0..BUFFER_WIDTH {
|
||||
let buffer = self.buffer();
|
||||
buffer.chars[row] = buffer.chars[row + 1]
|
||||
let character = buffer.chars[row][col].read();
|
||||
buffer.chars[row - 1][col].write(character);
|
||||
}
|
||||
}
|
||||
self.clear_row(BUFFER_HEIGHT - 1);
|
||||
self.column_position = 0;
|
||||
@@ -116,7 +114,9 @@ impl Writer {
|
||||
ascii_character: b' ',
|
||||
color_code: self.color_code,
|
||||
};
|
||||
self.buffer().chars[row] = [blank; BUFFER_WIDTH];
|
||||
for col in 0..BUFFER_WIDTH {
|
||||
self.buffer().chars[row][col].write(blank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ impl fmt::Write for Writer {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct ColorCode(u8);
|
||||
|
||||
impl ColorCode {
|
||||
@@ -138,7 +138,7 @@ impl ColorCode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
struct ScreenChar {
|
||||
ascii_character: u8,
|
||||
@@ -146,5 +146,5 @@ struct ScreenChar {
|
||||
}
|
||||
|
||||
struct Buffer {
|
||||
chars: [[ScreenChar; BUFFER_WIDTH]; BUFFER_HEIGHT],
|
||||
chars: [[Volatile<ScreenChar>; BUFFER_WIDTH]; BUFFER_HEIGHT],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user