diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 23538d63..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,10 +0,0 @@ -branches: - only: - - staging - - trying - - /post-.*/ - -build: false - -test_script: - - echo "Nothing to do for master branch" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index cab13b7b..00000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: minimal - -branches: - only: - # This is where pull requests from "bors r+" are built. - - staging - # This is where pull requests from "bors try" are built. - - trying - # Build post braches - - /^post-.*$/ - -script: - - echo "Nothing to do for master branch" diff --git a/LICENSE-MIT b/LICENSE-MIT index 2286d30b..de62280d 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015 Philipp Oppermann +Copyright (c) 2019 Philipp Oppermann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 08c6199a..cca40b6a 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ The code for each post lives in a separate git branch. This makes it possible to **The code for the latest post is available [here][latest-post].** -[latest-post]: https://github.com/phil-opp/blog_os/tree/post-10 +[latest-post]: https://github.com/phil-opp/blog_os/tree/post-11 You can find the branch for each post by following the `(source code)` link in the [post list](#posts) below. The branches are named `post-XX` where `XX` is the post number, for example `post-03` for the _VGA Text Mode_ post or `post-07` for the _Hardware Interrupts_ post. For build instructions, see the Readme of the respective branch. @@ -56,6 +56,8 @@ The goal of this project is to provide step-by-step tutorials in individual blog ([source code](https://github.com/phil-opp/blog_os/tree/post-09)) - [Heap Allocation](https://os.phil-opp.com/heap-allocation/) ([source code](https://github.com/phil-opp/blog_os/tree/post-10)) +- [Allocator Designs](https://os.phil-opp.com/allocator-designs/) + ([source code](https://github.com/phil-opp/blog_os/tree/post-11)) ## First Edition Posts @@ -104,4 +106,17 @@ The current version of the blog is already the second edition. The first edition ([source code](https://github.com/phil-opp/blog_os/tree/returning_from_exceptions)) ## License -The source code is dual-licensed under MIT or the Apache License (Version 2.0). This excludes the `blog` directory. + +This project, with exception of the `blog/content` folder, is licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +For licensing of the `blog/content` folder, see the [`blog/content/README.md`](blog/content/README.md). + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 2098d3ca..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Documentation: https://aka.ms/yaml - -trigger: - branches: - include: - - '*' - exclude: - - 'staging.tmp' - -pool: - vmImage: ubuntu-16.04 - -steps: -- bash: | - echo "Hello world from $AGENT_NAME running on $AGENT_OS" - echo "Reason: $BUILD_REASON" - echo "Requested for: $BUILD_REQUESTEDFOR" - displayName: 'Build Info' - continueOnError: true - -- bash: curl -sL https://github.com/getzola/zola/releases/download/v0.9.0/zola-v0.9.0-x86_64-unknown-linux-gnu.tar.gz | tar zxv - displayName: "Download Zola" - -- script: python -m pip install --upgrade pip setuptools wheel - displayName: 'Install Python Tools' - -- script: python -m pip install --user -r requirements.txt - displayName: 'Install Python Libraries' - workingDirectory: "blog" - -- script: python before_build.py - displayName: "Run before_build.py script" - workingDirectory: "blog" - -- script: ../zola build - displayName: "Build Site" - workingDirectory: "blog" - -- task: PublishPipelineArtifact@0 - inputs: - artifactName: 'generated_site' - targetPath: 'blog/public' - -- script: curl -L https://git.io/misspell | bash - displayName: "Install misspell" - -- script: bin/misspell -error blog/content - displayName: "Check for common typos" diff --git a/blog/before_build.py b/blog/before_build.py index b779889e..2f6e60e5 100644 --- a/blog/before_build.py +++ b/blog/before_build.py @@ -21,18 +21,22 @@ def format_number(number): with io.open("templates/auto/recent-updates.html", 'w', encoding='utf8') as recent_updates: recent_updates.truncate() - recent_updates.write(u"") repo = g.get_repo("phil-opp/blog_os") diff --git a/blog/config.toml b/blog/config.toml index e7f374f9..20d1c980 100644 --- a/blog/config.toml +++ b/blog/config.toml @@ -1,10 +1,13 @@ title = "Writing an OS in Rust" base_url = "https://os.phil-opp.com" +description = "This blog series creates a small operating system in the Rust programming language. Each post is a small tutorial and includes all needed code." highlight_code = true highlight_theme = "visual-studio-dark" generate_rss = true +ignored_content = ["*/README.md"] + [extra] subtitle = "Philipp Oppermann's blog" author = { name = "Philipp Oppermann" } diff --git a/blog/content/LICENSE-CC-BY-NC b/blog/content/LICENSE-CC-BY-NC new file mode 100644 index 00000000..165e4763 --- /dev/null +++ b/blog/content/LICENSE-CC-BY-NC @@ -0,0 +1,96 @@ +Creative Commons Attribution-NonCommercial 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. + +Section 1 – Definitions. + + Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. + Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. + Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. + Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. + Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. + Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. + Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. + Licensor means the individual(s) or entity(ies) granting rights under this Public License. + NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. + Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. + Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. + You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. + +Section 2 – Scope. + + License grant. + Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: + reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and + produce, reproduce, and Share Adapted Material for NonCommercial purposes only. + Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. + Term. The term of this Public License is specified in Section 6(a). + Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. + Downstream recipients. + Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. + No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. + No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). + + Other rights. + Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. + Patent and trademark rights are not licensed under this Public License. + To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. + +Section 3 – License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the following conditions. + + Attribution. + + If You Share the Licensed Material (including in modified form), You must: + retain the following if it is supplied by the Licensor with the Licensed Material: + identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); + a copyright notice; + a notice that refers to this Public License; + a notice that refers to the disclaimer of warranties; + a URI or hyperlink to the Licensed Material to the extent reasonably practicable; + indicate if You modified the Licensed Material and retain an indication of any previous modifications; and + indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. + You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. + If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. + If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. + +Section 4 – Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: + + for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; + if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and + You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. + +Section 5 – Disclaimer of Warranties and Limitation of Liability. + + Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. + To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. + + The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. + +Section 6 – Term and Termination. + + This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. + + Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: + automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or + upon express reinstatement by the Licensor. + For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. + For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. + Sections 1, 5, 6, 7, and 8 survive termination of this Public License. + +Section 7 – Other Terms and Conditions. + + The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. + Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. + +Section 8 – Interpretation. + + For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. + To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. + No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. + Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. diff --git a/blog/content/README.md b/blog/content/README.md new file mode 100644 index 00000000..df08330f --- /dev/null +++ b/blog/content/README.md @@ -0,0 +1,19 @@ +# Blog Content + +This folder contains the content for the _"Writing an OS in Rust"_ blog. + +## License + +This folder is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License, available in [LICENSE-CC-BY-NC](LICENSE-CC-BY-NC) or under . + +All _code examples_ between markdown code blocks denoted by three backticks (\`\`\`) are additionally licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](../../LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](../../LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you shall be licensed as above, without any additional terms or conditions. diff --git a/blog/content/pages/contact.md b/blog/content/pages/contact.md index fb0e007e..34c952f0 100644 --- a/blog/content/pages/contact.md +++ b/blog/content/pages/contact.md @@ -8,4 +8,4 @@ Philipp Oppermann contact@phil-opp.com -Dr.Gustav-Knodel-Str. 7b, 76344 Eggenstein, Germany +Gerwigstraße 17, 76131 Karlsruhe, Germany diff --git a/blog/content/second-edition/extra/building-on-android/index.md b/blog/content/second-edition/extra/building-on-android/index.md index 7d0d362a..9bdf0ef5 100644 --- a/blog/content/second-edition/extra/building-on-android/index.md +++ b/blog/content/second-edition/extra/building-on-android/index.md @@ -6,6 +6,8 @@ weight = 3 I finally managed to get `blog_os` building on my Android phone using [termux](https://termux.com/). This post explains the necessary steps to set it up. + + Screenshot of the compilation output from android diff --git a/blog/content/second-edition/posts/01-freestanding-rust-binary/index.md b/blog/content/second-edition/posts/01-freestanding-rust-binary/index.md index 5564e899..c704e57c 100644 --- a/blog/content/second-edition/posts/01-freestanding-rust-binary/index.md +++ b/blog/content/second-edition/posts/01-freestanding-rust-binary/index.md @@ -4,6 +4,8 @@ weight = 1 path = "freestanding-rust-binary" date = 2018-02-10 +[extra] +chapter = "Bare Bones" +++ The first step in creating our own operating system kernel is to create a Rust executable that does not link the standard library. This makes it possible to run Rust code on the [bare metal] without an underlying operating system. @@ -145,10 +147,11 @@ Language items are special functions and types that are required internally by t [`Copy`]: https://doc.rust-lang.org/nightly/core/marker/trait.Copy.html [copy code]: https://github.com/rust-lang/rust/blob/485397e49a02a3b7ff77c17e4a3f16c653925cb3/src/libcore/marker.rs#L296-L299 -Providing own implementations of language items would be possible, but this should only be done as a last resort. The reason is that language items are highly unstable implementation details and not even type checked (so the compiler doesn't even check if a function has the right argument types). Fortunately, there is a more stable way to fix the above language item error. +While providing custom implementations of language items is possible, it should only be done as a last resort. The reason is that language items are highly unstable implementation details and not even type checked (so the compiler doesn't even check if a function has the right argument types). Fortunately, there is a more stable way to fix the above language item error. -The `eh_personality` language item marks a function that is used for implementing [stack unwinding]. By default, Rust uses unwinding to run the destructors of all live stack variables in case of a [panic]. This ensures that all used memory is freed and allows the parent thread to catch the panic and continue execution. Unwinding, however, is a complicated process and requires some OS specific libraries (e.g. [libunwind] on Linux or [structured exception handling] on Windows), so we don't want to use it for our operating system. +The [`eh_personality` language item] marks a function that is used for implementing [stack unwinding]. By default, Rust uses unwinding to run the destructors of all live stack variables in case of a [panic]. This ensures that all used memory is freed and allows the parent thread to catch the panic and continue execution. Unwinding, however, is a complicated process and requires some OS specific libraries (e.g. [libunwind] on Linux or [structured exception handling] on Windows), so we don't want to use it for our operating system. +[`eh_personality` language item]: https://github.com/rust-lang/rust/blob/edb368491551a77d77a48446d4ee88b35490c565/src/libpanic_unwind/gcc.rs#L11-L45 [stack unwinding]: http://www.bogotobogo.com/cplusplus/stackunwinding.php [libunwind]: http://www.nongnu.org/libunwind/ [structured exception handling]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms680657(v=vs.85).aspx @@ -507,7 +510,7 @@ cargo rustc -- -C link-args="/ENTRY:_start /SUBSYSTEM:console" cargo rustc -- -C link-args="-e __start -static -nostartfiles" ``` -Note that this is just a minimal example of a freestanding Rust binary. This binary expects various things, for example that a stack is initialized when the `_start` function is called. **So it probably for any real use of such a binary, more steps are required**. +Note that this is just a minimal example of a freestanding Rust binary. This binary expects various things, for example that a stack is initialized when the `_start` function is called. **So for any real use of such a binary, more steps are required**. ## What's next? diff --git a/blog/content/second-edition/extra/disable-red-zone/index.md b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/index.md similarity index 96% rename from blog/content/second-edition/extra/disable-red-zone/index.md rename to blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/index.md index f8740dde..212ccb97 100644 --- a/blog/content/second-edition/extra/disable-red-zone/index.md +++ b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/index.md @@ -2,7 +2,7 @@ title = "Disable the Red Zone" weight = 1 path = "red-zone" - +template = "second-edition/extra.html" +++ The [red zone] is an optimization of the [System V ABI] that allows functions to temporarily use the 128 bytes below its stack frame without adjusting the stack pointer: @@ -10,6 +10,8 @@ The [red zone] is an optimization of the [System V ABI] that allows functions to [red zone]: http://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64#the-red-zone [System V ABI]: http://wiki.osdev.org/System_V_ABI + + ![stack frame with red zone](red-zone.svg) The image shows the stack frame of a function with `n` local variables. On function entry, the stack pointer is adjusted to make room on the stack for the return address and the local variables. diff --git a/blog/content/second-edition/extra/disable-red-zone/red-zone-overwrite.svg b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/red-zone-overwrite.svg similarity index 100% rename from blog/content/second-edition/extra/disable-red-zone/red-zone-overwrite.svg rename to blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/red-zone-overwrite.svg diff --git a/blog/content/second-edition/extra/disable-red-zone/red-zone.svg b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/red-zone.svg similarity index 100% rename from blog/content/second-edition/extra/disable-red-zone/red-zone.svg rename to blog/content/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/red-zone.svg diff --git a/blog/content/second-edition/extra/disable-simd/index.md b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-simd/index.md similarity index 98% rename from blog/content/second-edition/extra/disable-simd/index.md rename to blog/content/second-edition/posts/02-minimal-rust-kernel/disable-simd/index.md index 6810ddf6..6fe2f8d8 100644 --- a/blog/content/second-edition/extra/disable-simd/index.md +++ b/blog/content/second-edition/posts/02-minimal-rust-kernel/disable-simd/index.md @@ -2,13 +2,15 @@ title = "Disable SIMD" weight = 2 path = "disable-simd" - +template = "second-edition/extra.html" +++ [Single Instruction Multiple Data (SIMD)] instructions are able to perform an operation (e.g. addition) simultaneously on multiple data words, which can speed up programs significantly. The `x86_64` architecture supports various SIMD standards: [Single Instruction Multiple Data (SIMD)]: https://en.wikipedia.org/wiki/SIMD + + - [MMX]: The _Multi Media Extension_ instruction set was introduced in 1997 and defines eight 64 bit registers called `mm0` through `mm7`. These registers are just aliases for the registers of the [x87 floating point unit]. - [SSE]: The _Streaming SIMD Extensions_ instruction set was introduced in 1999. Instead of re-using the floating point registers, it adds a completely new register set. The sixteen new registers are called `xmm0` through `xmm15` and are 128 bits each. - [AVX]: The _Advanced Vector Extensions_ are extensions that further increase the size of the multimedia registers. The new registers are called `ymm0` through `ymm15` and are 256 bits each. They extend the `xmm` registers, so e.g. `xmm0` is the lower half of `ymm0`. diff --git a/blog/content/second-edition/posts/02-minimal-rust-kernel/index.md b/blog/content/second-edition/posts/02-minimal-rust-kernel/index.md index 1f77204c..26ce8b18 100644 --- a/blog/content/second-edition/posts/02-minimal-rust-kernel/index.md +++ b/blog/content/second-edition/posts/02-minimal-rust-kernel/index.md @@ -4,6 +4,8 @@ weight = 2 path = "minimal-rust-kernel" date = 2018-02-10 +[extra] +chapter = "Bare Bones" +++ In this post we create a minimal 64-bit Rust kernel for the x86 architecture. We build upon the [freestanding Rust binary] from the previous post to create a bootable disk image, that prints something to the screen. @@ -169,7 +171,7 @@ This setting specifies that the target doesn't support [stack unwinding] on pani We're writing a kernel, so we'll need to handle interrupts at some point. To do that safely, we have to disable a certain stack pointer optimization called the _“red zone”_, because it would cause stack corruptions otherwise. For more information, see our separate post about [disabling the red zone]. -[disabling the red zone]: @/second-edition/extra/disable-red-zone/index.md +[disabling the red zone]: @/second-edition/posts/02-minimal-rust-kernel/disable-red-zone/index.md ```json "features": "-mmx,-sse,+soft-float", @@ -183,7 +185,7 @@ The `mmx` and `sse` features determine support for [Single Instruction Multiple A problem with disabling SIMD is that floating point operations on `x86_64` require SIMD registers by default. To solve this problem, we add the `soft-float` feature, which emulates all floating point operations through software functions based on normal integers. -For more information, see our post on [disabling SIMD](@/second-edition/extra/disable-simd/index.md). +For more information, see our post on [disabling SIMD](@/second-edition/posts/02-minimal-rust-kernel/disable-simd/index.md). #### Putting it Together Our target specification file now looks like this: diff --git a/blog/content/second-edition/posts/03-vga-text-buffer/index.md b/blog/content/second-edition/posts/03-vga-text-buffer/index.md index 09b46610..94098765 100644 --- a/blog/content/second-edition/posts/03-vga-text-buffer/index.md +++ b/blog/content/second-edition/posts/03-vga-text-buffer/index.md @@ -4,6 +4,8 @@ weight = 3 path = "vga-text-mode" date = 2018-02-26 +[extra] +chapter = "Bare Bones" +++ The [VGA text mode] is a simple way to print text to the screen. In this post, we create an interface that makes its usage safe and simple, by encapsulating all unsafety in a separate module. We also implement support for Rust's [formatting macros]. diff --git a/blog/content/second-edition/posts/04-testing/index.md b/blog/content/second-edition/posts/04-testing/index.md index 6e66feab..c63e4ce7 100644 --- a/blog/content/second-edition/posts/04-testing/index.md +++ b/blog/content/second-edition/posts/04-testing/index.md @@ -4,6 +4,8 @@ weight = 4 path = "testing" date = 2019-04-27 +[extra] +chapter = "Bare Bones" +++ This post explores unit and integration testing in `no_std` executables. We will use Rust's support for custom test frameworks to execute test functions inside our kernel. To report the results out of QEMU, we will use different features of QEMU and the `bootimage` tool. @@ -166,18 +168,18 @@ The functionality of the `isa-debug-exit` device is very simple. When a `value` Instead of manually invoking the `in` and `out` assembly instructions, we use the abstractions provided by the [`x86_64`] crate. To add a dependency on that crate, we add it to the `dependencies` section in our `Cargo.toml`: -[`x86_64`]: https://docs.rs/x86_64/0.7.5/x86_64/ +[`x86_64`]: https://docs.rs/x86_64/0.8.1/x86_64/ ```toml # in Cargo.toml [dependencies] -x86_64 = "0.7.5" +x86_64 = "0.8.1" ``` Now we can use the [`Port`] type provided by the crate to create an `exit_qemu` function: -[`Port`]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/port/struct.Port.html +[`Port`]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/port/struct.Port.html ```rust // in src/main.rs diff --git a/blog/content/second-edition/posts/05-cpu-exceptions/index.md b/blog/content/second-edition/posts/05-cpu-exceptions/index.md index 0fa6862b..f318cfdb 100644 --- a/blog/content/second-edition/posts/05-cpu-exceptions/index.md +++ b/blog/content/second-edition/posts/05-cpu-exceptions/index.md @@ -4,6 +4,8 @@ weight = 5 path = "cpu-exceptions" date = 2018-06-17 +[extra] +chapter = "Interrupts" +++ CPU exceptions occur in various erroneous situations, for example when accessing an invalid memory address or when dividing by zero. To react to them we have to set up an _interrupt descriptor table_ that provides handler functions. At the end of this post, our kernel will be able to catch [breakpoint exceptions] and to resume normal execution afterwards. @@ -82,7 +84,7 @@ Don't worry about steps 4 and 5 for now, we will learn about the global descript ## An IDT Type Instead of creating our own IDT type, we will use the [`InterruptDescriptorTable` struct] of the `x86_64` crate, which looks like this: -[`InterruptDescriptorTable` struct]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.InterruptDescriptorTable.html +[`InterruptDescriptorTable` struct]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.InterruptDescriptorTable.html ``` rust #[repr(C)] @@ -113,10 +115,10 @@ pub struct InterruptDescriptorTable { The fields have the type [`idt::Entry`], which is a struct that represents the fields of an IDT entry (see the table above). The type parameter `F` defines the expected handler function type. We see that some entries require a [`HandlerFunc`] and some entries require a [`HandlerFuncWithErrCode`]. The page fault even has its own special type: [`PageFaultHandlerFunc`]. -[`idt::Entry`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.Entry.html -[`HandlerFunc`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/type.HandlerFunc.html -[`HandlerFuncWithErrCode`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/type.HandlerFuncWithErrCode.html -[`PageFaultHandlerFunc`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/type.PageFaultHandlerFunc.html +[`idt::Entry`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.Entry.html +[`HandlerFunc`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/type.HandlerFunc.html +[`HandlerFuncWithErrCode`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/type.HandlerFuncWithErrCode.html +[`PageFaultHandlerFunc`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/type.PageFaultHandlerFunc.html Let's look at the `HandlerFunc` type first: @@ -168,6 +170,8 @@ In contrast to function calls, exceptions can occur on _any_ instruction. In mos Since we don't know when an exception occurs, we can't backup any registers before. This means that we can't use a calling convention that relies on caller-saved registers for exception handlers. Instead, we need a calling convention means that preserves _all registers_. The `x86-interrupt` calling convention is such a calling convention, so it guarantees that all register values are restored to their original values on function return. +Note that this does not mean that all registers are saved to the stack at function entry. Instead, the compiler only backs up the registers that are overwritten by the function. This way, very efficient code can be generated for short functions that only use a few registers. + ### The Interrupt Stack Frame On a normal function call (using the `call` instruction), the CPU pushes the return address before jumping to the target function. On function return (using the `ret` instruction), the CPU pops this return address and jumps to it. So the stack frame of a normal function call looks like this: @@ -191,13 +195,7 @@ So the _interrupt stack frame_ looks like this: In the `x86_64` crate, the interrupt stack frame is represented by the [`InterruptStackFrame`] struct. It is passed to interrupt handlers as `&mut` and can be used to retrieve additional information about the exception's cause. The struct contains no error code field, since only some few exceptions push an error code. These exceptions use the separate [`HandlerFuncWithErrCode`] function type, which has an additional `error_code` argument. -[`InterruptStackFrame`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.InterruptStackFrame.html - -Note that there is currently [a bug in LLVM] that leads to wrong error code arguments. The cause of the issue is already known and a solution is [being worked on]. - -[a bug in LLVM]: https://github.com/rust-lang/rust/issues/57270 -[being worked on]: https://reviews.llvm.org/D56275 - +[`InterruptStackFrame`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.InterruptStackFrame.html ### Behind the Scenes The `x86-interrupt` calling convention is a powerful abstraction that hides almost all of the messy details of the exception handling process. However, sometimes it's useful to know what's happening behind the curtain. Here is a short overview of the things that the `x86-interrupt` calling convention takes care of: @@ -279,7 +277,7 @@ This error occurs because the `x86-interrupt` calling convention is still unstab In order that the CPU uses our new interrupt descriptor table, we need to load it using the [`lidt`] instruction. The `InterruptDescriptorTable` struct of the `x86_64` provides a [`load`][InterruptDescriptorTable::load] method function for that. Let's try to use it: [`lidt`]: https://www.felixcloutier.com/x86/lgdt:lidt -[InterruptDescriptorTable::load]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.InterruptDescriptorTable.html#method.load +[InterruptDescriptorTable::load]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.InterruptDescriptorTable.html#method.load ```rust // in src/interrupts.rs @@ -462,7 +460,7 @@ You can try this new test by running `cargo xtest` (all tests) or `cargo xtest - The `x86-interrupt` calling convention and the [`InterruptDescriptorTable`] type made the exception handling process relatively straightforward and painless. If this was too much magic for you and you like to learn all the gory details of exception handling, we got you covered: Our [“Handling Exceptions with Naked Functions”] series shows how to handle exceptions without the `x86-interrupt` calling convention and also creates its own IDT type. Historically, these posts were the main exception handling posts before the `x86-interrupt` calling convention and the `x86_64` crate existed. Note that these posts are based on the [first edition] of this blog and might be out of date. [“Handling Exceptions with Naked Functions”]: @/first-edition/extra/naked-exceptions/_index.md -[`InterruptDescriptorTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.InterruptDescriptorTable.html +[`InterruptDescriptorTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.InterruptDescriptorTable.html [first edition]: @/first-edition/_index.md ## What's next? diff --git a/blog/content/second-edition/posts/06-double-faults/index.md b/blog/content/second-edition/posts/06-double-faults/index.md index a0f587d9..52adaf19 100644 --- a/blog/content/second-edition/posts/06-double-faults/index.md +++ b/blog/content/second-edition/posts/06-double-faults/index.md @@ -4,6 +4,8 @@ weight = 6 path = "double-fault-exceptions" date = 2018-06-18 +[extra] +chapter = "Interrupts" +++ This post explores the double fault exception in detail, which occurs when the CPU fails to invoke an exception handler. By handling this exception we avoid fatal _triple faults_ that cause a system reset. To prevent triple faults in all cases we also set up an _Interrupt Stack Table_ to catch double faults on a separate kernel stack. @@ -79,13 +81,15 @@ lazy_static! { // new extern "x86-interrupt" fn double_fault_handler( - stack_frame: &mut InterruptStackFrame, _error_code: u64) + stack_frame: &mut InterruptStackFrame, _error_code: u64) -> ! { panic!("EXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); } ``` -Our handler prints a short error message and dumps the exception stack frame. The error code of the double fault handler is always zero, so there's no reason to print it. +Our handler prints a short error message and dumps the exception stack frame. The error code of the double fault handler is always zero, so there's no reason to print it. One difference to the breakpoint handler is that the double fault handler is [_diverging_]. The reason is that the `x86_64` architecture does not permit returning from a double fault exception. + +[_diverging_]: https://doc.rust-lang.org/stable/rust-by-example/fn/diverging.html When we start our kernel now, we should see that the double fault handler is invoked: @@ -225,7 +229,7 @@ The _Privilege Stack Table_ is used by the CPU when the privilege level changes. ### Creating a TSS Let's create a new TSS that contains a separate double fault stack in its interrupt stack table. For that we need a TSS struct. Fortunately, the `x86_64` crate already contains a [`TaskStateSegment` struct] that we can use. -[`TaskStateSegment` struct]: https://docs.rs/x86_64/0.7.5/x86_64/structures/tss/struct.TaskStateSegment.html +[`TaskStateSegment` struct]: https://docs.rs/x86_64/0.8.1/x86_64/structures/tss/struct.TaskStateSegment.html We create the TSS in a new `gdt` module (the name will make sense later): @@ -371,8 +375,8 @@ pub fn init() { We reload the code segment register using [`set_cs`] and to load the TSS using [`load_tss`]. The functions are marked as `unsafe`, so we need an `unsafe` block to invoke them. The reason is that it might be possible to break memory safety by loading invalid selectors. -[`set_cs`]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/segmentation/fn.set_cs.html -[`load_tss`]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/tables/fn.load_tss.html +[`set_cs`]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/segmentation/fn.set_cs.html +[`load_tss`]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/tables/fn.load_tss.html Now that we loaded a valid TSS and interrupt stack table, we can set the stack index for our double fault handler in the IDT: @@ -517,7 +521,7 @@ use x86_64::structures::idt::InterruptStackFrame; extern "x86-interrupt" fn test_double_fault_handler( _stack_frame: &mut InterruptStackFrame, _error_code: u64, -) { +) -> ! { serial_println!("[ok]"); exit_qemu(QemuExitCode::Success); loop {} diff --git a/blog/content/second-edition/posts/07-hardware-interrupts/index.md b/blog/content/second-edition/posts/07-hardware-interrupts/index.md index 886c38b8..d472b95c 100644 --- a/blog/content/second-edition/posts/07-hardware-interrupts/index.md +++ b/blog/content/second-edition/posts/07-hardware-interrupts/index.md @@ -4,6 +4,8 @@ weight = 7 path = "hardware-interrupts" date = 2018-10-22 +[extra] +chapter = "Interrupts" +++ In this post we set up the programmable interrupt controller to correctly forward hardware interrupts to the CPU. To handle these interrupts we add new entries to our interrupt descriptor table, just like we did for our exception handlers. We will learn how to get periodic timer interrupts and how to get input from the keyboard. @@ -206,7 +208,7 @@ extern "x86-interrupt" fn timer_interrupt_handler( Our `timer_interrupt_handler` has the same signature as our exception handlers, because the CPU reacts identically to exceptions and external interrupts (the only difference is that some exceptions push an error code). The [`InterruptDescriptorTable`] struct implements the [`IndexMut`] trait, so we can access individual entries through array indexing syntax. -[`InterruptDescriptorTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.InterruptDescriptorTable.html +[`InterruptDescriptorTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.InterruptDescriptorTable.html [`IndexMut`]: https://doc.rust-lang.org/core/ops/trait.IndexMut.html In our timer interrupt handler, we print a dot to the screen. As the timer interrupt happens periodically, we would expect to see a dot appearing on each timer tick. However, when we run it we see that only a single dot is printed: @@ -331,7 +333,7 @@ pub fn _print(args: fmt::Arguments) { The [`without_interrupts`] function takes a [closure] and executes it in an interrupt-free environment. We use it to ensure that no interrupt can occur as long as the `Mutex` is locked. When we run our kernel now we see that it keeps running without hanging. (We still don't notice any dots, but this is because they're scrolling by too fast. Try to slow down the printing, e.g. by putting a `for _ in 0..10000 {}` inside the loop.) -[`without_interrupts`]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/interrupts/fn.without_interrupts.html +[`without_interrupts`]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/interrupts/fn.without_interrupts.html [closure]: https://doc.rust-lang.org/book/second-edition/ch13-01-closures.html We can apply the same change to our serial printing function to ensure that no deadlocks occur with it either: @@ -586,7 +588,7 @@ extern "x86-interrupt" fn keyboard_interrupt_handler( We use the [`Port`] type of the `x86_64` crate to read a byte from the keyboard's data port. This byte is called the [_scancode_] and is a number that represents the key press/release. We don't do anything with the scancode yet, we just print it to the screen: -[`Port`]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/port/struct.Port.html +[`Port`]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/port/struct.Port.html [_scancode_]: https://en.wikipedia.org/wiki/Scancode ![QEMU printing scancodes to the screen when keys are pressed](qemu-printing-scancodes.gif) diff --git a/blog/content/second-edition/posts/08-paging-introduction/index.md b/blog/content/second-edition/posts/08-paging-introduction/index.md index 4ed55aff..befcfb23 100644 --- a/blog/content/second-edition/posts/08-paging-introduction/index.md +++ b/blog/content/second-edition/posts/08-paging-introduction/index.md @@ -4,6 +4,8 @@ weight = 8 path = "paging-introduction" date = 2019-01-14 +[extra] +chapter = "Memory Management" +++ This post introduces _paging_, a very common memory management scheme that we will also use for our operating system. It explains why memory isolation is needed, how _segmentation_ works, what _virtual memory_ is, and how paging solves memory fragmentation issues. It also explores the layout of multilevel page tables on the x86_64 architecture. @@ -229,8 +231,8 @@ Let's take a closer look at the available flags: The `x86_64` crate provides types for [page tables] and their [entries], so we don't need to create these structures ourselves. -[page tables]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page_table/struct.PageTable.html -[entries]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page_table/struct.PageTableEntry.html +[page tables]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page_table/struct.PageTable.html +[entries]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page_table/struct.PageTableEntry.html ### The Translation Lookaside Buffer @@ -239,7 +241,7 @@ A 4-level page table makes the translation of virtual addresses expensive, becau Unlike the other CPU caches, the TLB is not fully transparent and does not update or remove translations when the contents of page tables change. This means that the kernel must manually update the TLB whenever it modifies a page table. To do this, there is a special CPU instruction called [`invlpg`] (“invalidate page”) that removes the translation for the specified page from the TLB, so that it is loaded again from the page table on the next access. The TLB can also be flushed completely by reloading the `CR3` register, which simulates an address space switch. The `x86_64` crate provides Rust functions for both variants in the [`tlb` module]. [`invlpg`]: https://www.felixcloutier.com/x86/INVLPG.html -[`tlb` module]: https://docs.rs/x86_64/0.7.5/x86_64/instructions/tlb/index.html +[`tlb` module]: https://docs.rs/x86_64/0.8.1/x86_64/instructions/tlb/index.html It is important to remember flushing the TLB on each page table modification because otherwise the CPU might keep using the old translation, which can lead to non-deterministic bugs that are very hard to debug. @@ -294,8 +296,8 @@ extern "x86-interrupt" fn page_fault_handler( The [`CR2`] register is automatically set by the CPU on a page fault and contains the accessed virtual address that caused the page fault. We use the [`Cr2::read`] function of the `x86_64` crate to read and print it. The [`PageFaultErrorCode`] type provides more information about the type of memory access that caused the page fault, for example whether it was caused by a read or write operation. For this reason we print it too. We can't continue execution without resolving the page fault, so we enter a [`hlt_loop`] at the end. [`CR2`]: https://en.wikipedia.org/wiki/Control_register#CR2 -[`Cr2::read`]: https://docs.rs/x86_64/0.7.5/x86_64/registers/control/struct.Cr2.html#method.read -[`PageFaultErrorCode`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.PageFaultErrorCode.html +[`Cr2::read`]: https://docs.rs/x86_64/0.8.1/x86_64/registers/control/struct.Cr2.html#method.read +[`PageFaultErrorCode`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.PageFaultErrorCode.html [LLVM bug]: https://github.com/rust-lang/rust/issues/57270 [`hlt_loop`]: @/second-edition/posts/07-hardware-interrupts/index.md#the-hlt-instruction @@ -329,7 +331,7 @@ When we run it, we see that our page fault handler is called: The `CR2` register indeed contains `0xdeadbeaf`, the address that we tried to access. The error code tells us through the [`CAUSED_BY_WRITE`] that the fault occurred while trying to perform a write operation. It tells us even more through the [bits that are _not_ set][`PageFaultErrorCode`]. For example, the fact that the `PROTECTION_VIOLATION` flag is not set means that the page fault occurred because the target page wasn't present. -[`CAUSED_BY_WRITE`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.PageFaultErrorCode.html#associatedconstant.CAUSED_BY_WRITE +[`CAUSED_BY_WRITE`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.PageFaultErrorCode.html#associatedconstant.CAUSED_BY_WRITE We see that the current instruction pointer is `0x2031b2`, so we know that this address points to a code page. Code pages are mapped read-only by the bootloader, so reading from this address works but writing causes a page fault. You can try this by changing the `0xdeadbeaf` pointer to `0x2031b2`: @@ -353,7 +355,7 @@ By commenting out the last line, we see that the read access works, but the writ We see that the _"read worked"_ message is printed, which indicates that the read operation did not cause any errors. However, instead of the _"write worked"_ message a page fault occurs. This time the [`PROTECTION_VIOLATION`] flag is set in addition to the [`CAUSED_BY_WRITE`] flag, which indicates that the page was present, but the operation was not allowed on it. In this case, writes to the page are not allowed since code pages are mapped as read-only. -[`PROTECTION_VIOLATION`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/idt/struct.PageFaultErrorCode.html#associatedconstant.PROTECTION_VIOLATION +[`PROTECTION_VIOLATION`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/idt/struct.PageFaultErrorCode.html#associatedconstant.PROTECTION_VIOLATION ### Accessing the Page Tables @@ -379,9 +381,9 @@ pub extern "C" fn _start() -> ! { The [`Cr3::read`] function of the `x86_64` returns the currently active level 4 page table from the `CR3` register. It returns a tuple of a [`PhysFrame`] and a [`Cr3Flags`] type. We are only interested in the frame, so we ignore the second element of the tuple. -[`Cr3::read`]: https://docs.rs/x86_64/0.7.5/x86_64/registers/control/struct.Cr3.html#method.read -[`PhysFrame`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/frame/struct.PhysFrame.html -[`Cr3Flags`]: https://docs.rs/x86_64/0.7.5/x86_64/registers/control/struct.Cr3Flags.html +[`Cr3::read`]: https://docs.rs/x86_64/0.8.1/x86_64/registers/control/struct.Cr3.html#method.read +[`PhysFrame`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/frame/struct.PhysFrame.html +[`Cr3Flags`]: https://docs.rs/x86_64/0.8.1/x86_64/registers/control/struct.Cr3Flags.html When we run it, we see the following output: @@ -391,7 +393,7 @@ Level 4 page table at: PhysAddr(0x1000) So the currently active level 4 page table is stored at address `0x1000` in _physical_ memory, as indicated by the [`PhysAddr`] wrapper type. The question now is: how can we access this table from our kernel? -[`PhysAddr`]: https://docs.rs/x86_64/0.7.5/x86_64/struct.PhysAddr.html +[`PhysAddr`]: https://docs.rs/x86_64/0.8.1/x86_64/struct.PhysAddr.html Accessing physical memory directly is not possible when paging is active, since programs could easily circumvent memory protection and access memory of other programs otherwise. So the only way to access the table is through some virtual page that is mapped to the physical frame at address `0x1000`. This problem of creating mappings for page table frames is a general problem, since the kernel needs to access the page tables regularly, for example when allocating a stack for a new thread. diff --git a/blog/content/second-edition/posts/09-paging-implementation/index.md b/blog/content/second-edition/posts/09-paging-implementation/index.md index 97bef938..ab0c5d9c 100644 --- a/blog/content/second-edition/posts/09-paging-implementation/index.md +++ b/blog/content/second-edition/posts/09-paging-implementation/index.md @@ -3,6 +3,9 @@ title = "Paging Implementation" weight = 9 path = "paging-implementation" date = 2019-03-14 + +[extra] +chapter = "Memory Management" +++ This post shows how to implement paging support in our kernel. It first explores different techniques to make the physical page table frames accessible to the kernel and discusses their respective advantages and drawbacks. It then implements an address translation function and a function to create a new mapping. @@ -31,11 +34,11 @@ To implement the approach, we will need support from the bootloader, so we'll co ### Dependency Updates -This post requires version 0.7.5 or later of the `x86_64` dependency. You can update the dependency in your `Cargo.toml`: +This post requires version 0.8.1 or later of the `x86_64` dependency. You can update the dependency in your `Cargo.toml`: ```toml [dependencies] -x86_64 = "0.7.5" +x86_64 = "0.8.1" ``` For an overview of the changes in recent versions, check out the [`x86_64` changelog]. @@ -229,7 +232,7 @@ The above code assumes that the last level 4 entry with index `0o777` (511) is r Alternatively to performing the bitwise operations by hand, you can use the [`RecursivePageTable`] type of the `x86_64` crate, which provides safe abstractions for various page table operations. For example, the code below shows how to translate a virtual address to its mapped physical address: -[`RecursivePageTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.RecursivePageTable.html +[`RecursivePageTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.RecursivePageTable.html ```rust // in src/memory.rs @@ -447,7 +450,7 @@ fn kernel_main(boot_info: &'static BootInfo) -> ! { First, we convert the `physical_memory_offset` of the `BootInfo` struct to a [`VirtAddr`] and pass it to the `active_level_4_table` function. We then use the `iter` function to iterate over the page table entries and the [`enumerate`] combinator to additionally add an index `i` to each element. We only print non-empty entries because all 512 entries wouldn't fit on the screen. -[`VirtAddr`]: https://docs.rs/x86_64/0.7.5/x86_64/struct.VirtAddr.html +[`VirtAddr`]: https://docs.rs/x86_64/0.8.1/x86_64/struct.VirtAddr.html [`enumerate`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html#method.enumerate When we run it, we see the following output: @@ -560,7 +563,7 @@ The `VirtAddr` struct already provides methods to compute the indexes into the p Inside the loop, we again use the `physical_memory_offset` to convert the frame into a page table reference. We then read the entry of the current page table and use the [`PageTableEntry::frame`] function to retrieve the mapped frame. If the entry is not mapped to a frame we return `None`. If the entry maps a huge 2MiB or 1GiB page we panic for now. -[`PageTableEntry::frame`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page_table/struct.PageTableEntry.html#method.frame +[`PageTableEntry::frame`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page_table/struct.PageTableEntry.html#method.frame Let's test our translation function by translating some addresses: @@ -616,18 +619,18 @@ The base of the abstraction are two traits that define various page table mappin - The [`Mapper`] trait is generic over the page size and provides functions that operate on pages. Examples are [`translate_page`], which translates a given page to a frame of the same size, and [`map_to`], which creates a new mapping in the page table. - The [`MapperAllSizes`] trait implies that the implementor implements `Mapper` for all pages sizes. In addition, it provides functions that work with multiple page sizes such as [`translate_addr`] or the general [`translate`]. -[`Mapper`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.Mapper.html -[`translate_page`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.translate_page -[`map_to`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.map_to -[`MapperAllSizes`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.MapperAllSizes.html -[`translate_addr`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.MapperAllSizes.html#method.translate_addr -[`translate`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.MapperAllSizes.html#tymethod.translate +[`Mapper`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.Mapper.html +[`translate_page`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.translate_page +[`map_to`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.map_to +[`MapperAllSizes`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.MapperAllSizes.html +[`translate_addr`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.MapperAllSizes.html#method.translate_addr +[`translate`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.MapperAllSizes.html#tymethod.translate The traits only define the interface, they don't provide any implementation. The `x86_64` crate currently provides three types that implement the traits with different requirements. The [`OffsetPageTable`] type assumes that the complete physical memory is mapped to the virtual address space at some offset. The [`MappedPageTable`] is a bit more flexible: It only requires that each page table frame is mapped to the virtual address space at a calculable address. Finally, the [`RecursivePageTable`] type can be used to access page table frames through [recursive page tables](#recursive-page-tables). -[`OffsetPageTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.OffsetPageTable.html -[`MappedPageTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.MappedPageTable.html -[`RecursivePageTable`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.RecursivePageTable.html +[`OffsetPageTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.OffsetPageTable.html +[`MappedPageTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.MappedPageTable.html +[`RecursivePageTable`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.RecursivePageTable.html In our case, the bootloader maps the complete physical memory at an virtual address specfied by the `physical_memory_offset` variable, so we can use the `OffsetPageTable` type. To initialize it, we create a new `init` function in our `memory` module: @@ -653,7 +656,7 @@ unsafe fn active_level_4_table(physical_memory_offset: VirtAddr) The function takes the `physical_memory_offset` as an argument and returns a new `OffsetPageTable` instance with a `'static` lifetime. This means that the instance stays valid for the complete runtime of our kernel. In the function body, we first call the `active_level_4_table` function to retrieve a mutable reference to the level 4 page table. We then invoke the [`OffsetPageTable::new`] function with this reference. As the second parameter, the `new` function expects the virtual address at which the mapping of the physical memory starts, which is given in the `physical_memory_offset` variable. -[`OffsetPageTable::new`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.OffsetPageTable.html#method.new +[`OffsetPageTable::new`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.OffsetPageTable.html#method.new The `active_level_4_table` function should be only called from the `init` function from now on because it can easily lead to aliased mutable references when called multiple times, which can cause undefined behavior. For this reason, we make the function private by removing the `pub` specifier. @@ -704,8 +707,8 @@ Until now we only looked at the page tables without modifying anything. Let's ch We will use the [`map_to`] function of the [`Mapper`] trait for our implementation, so let's take a look at that function first. The documentation tells us that it takes four arguments: the page that we want to map, the frame that the page should be mapped to, a set of flags for the page table entry, and a `frame_allocator`. The frame allocator is needed because mapping the given page might require creating additional page tables, which need unused frames as backing storage. -[`map_to`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/trait.Mapper.html#tymethod.map_to -[`Mapper`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/trait.Mapper.html +[`map_to`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/trait.Mapper.html#tymethod.map_to +[`Mapper`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/trait.Mapper.html #### A `create_example_mapping` Function @@ -718,7 +721,7 @@ The `create_example_mapping` function looks like this: use x86_64::{ PhysAddr, - structures::paging::{Page, PhysFrame, Mapper, Size4KiB, FrameAllocator} + structures::paging::{Page, PhysFrame, Mapper, Size4KiB, FrameAllocator, UnusedPhysFrame} }; /// Creates an example mapping for the given page to frame `0xb8000`. @@ -730,11 +733,11 @@ pub fn create_example_mapping( use x86_64::structures::paging::PageTableFlags as Flags; let frame = PhysFrame::containing_address(PhysAddr::new(0xb8000)); + // FIXME: ONLY FOR TEMPORARY TESTING + let unused_frame = unsafe { UnusedPhysFrame::new(frame) }; let flags = Flags::PRESENT | Flags::WRITABLE; - let map_to_result = unsafe { - mapper.map_to(page, frame, flags, frame_allocator) - }; + let map_to_result = mapper.map_to(page, unused_frame, flags, frame_allocator); map_to_result.expect("map_to failed").flush(); } ``` @@ -743,10 +746,15 @@ In addition to the `page` that should be mapped, the function expects a mutable [impl-trait-arg]: https://doc.rust-lang.org/book/ch10-02-traits.html#traits-as-parameters [generic]: https://doc.rust-lang.org/book/ch10-00-generics.html -[`FrameAllocator`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/trait.FrameAllocator.html -[`PageSize`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page/trait.PageSize.html +[`FrameAllocator`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/trait.FrameAllocator.html +[`PageSize`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page/trait.PageSize.html -For the mapping, we set the `PRESENT` flag because it is required for all valid entries and the `WRITABLE` flag to make the mapped page writable. Calling [`map_to`] is unsafe because it's possible to break memory safety with invalid arguments, so we need to use an `unsafe` block. For a list of all possible flags, see the [_Page Table Format_] section of the previous post. +Instead of a normal `PhysFrame`, the [`map_to`] method requires an [`UnusedPhysFrame`] wrapper type to ensure that the frame is not already in use. The reason for this is that mapping the same frame twice could result in undefined behavior, for example when two different `&mut` references point to the same physical memory location. In our case, we reuse the VGA text buffer frame, which is already mapped, so we break the required condition when calling the unsafe [`UnusedPhysFrame::new`] function. However, the `create_example_mapping` function is only a temporary testing function and will be removed after this post, so it is ok. To remind us of the unsafety, we put a `FIXME` comment on the line. + +[`UnusedPhysFrame`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/struct.UnusedPhysFrame.html +[`UnusedPhysFrame::new`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/struct.UnusedPhysFrame.html#method.new + +In addition to the `page` and the `unused_frame`, the `map_to` method takes a set of flags for the mapping and a reference to the `frame_allocator`, which will be explained in a moment. For the flags, we set the `PRESENT` flag because it is required for all valid entries and the `WRITABLE` flag to make the mapped page writable. For a list of all possible flags, see the [_Page Table Format_] section of the previous post. [_Page Table Format_]: @/second-edition/posts/08-paging-introduction/index.md#page-table-format @@ -754,8 +762,8 @@ The [`map_to`] function can fail, so it returns a [`Result`]. Since this is just [`Result`]: https://doc.rust-lang.org/core/result/enum.Result.html [`expect`]: https://doc.rust-lang.org/core/result/enum.Result.html#method.expect -[`MapperFlush`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.MapperFlush.html -[`flush`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.MapperFlush.html#method.flush +[`MapperFlush`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.MapperFlush.html +[`flush`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.MapperFlush.html#method.flush [must_use]: https://doc.rust-lang.org/std/result/#results-must-be-used #### A dummy `FrameAllocator` @@ -771,7 +779,7 @@ Let's start with the simple case and assume that we don't need to create new pag pub struct EmptyFrameAllocator; unsafe impl FrameAllocator for EmptyFrameAllocator { - fn allocate_frame(&mut self) -> Option { + fn allocate_frame(&mut self) -> Option { None } } @@ -906,7 +914,7 @@ use bootloader::bootinfo::MemoryRegionType; impl BootInfoFrameAllocator { /// Returns an iterator over the usable frames specified in the memory map. - fn usable_frames(&self) -> impl Iterator { + fn usable_frames(&self) -> impl Iterator { // get usable regions from memory map let regions = self.memory_map.iter(); let usable_regions = regions @@ -917,8 +925,9 @@ impl BootInfoFrameAllocator { // transform to an iterator of frame start addresses let frame_addresses = addr_ranges.flat_map(|r| r.step_by(4096)); // create `PhysFrame` types from the start addresses - frame_addresses - .map(|addr|PhysFrame::containing_address(PhysAddr::new(addr))) + let frames = frame_addresses.map(|addr| PhysFrame::containing_address(PhysAddr::new(addr))); + // we know that the frames are really unused + frames.map(|f| unsafe { UnusedPhysFrame::new(f) }) } } ``` @@ -929,7 +938,8 @@ This function uses iterator combinator methods to transform the initial `MemoryM - Then we use the [`filter`] method to skip any reserved or otherwise unavailable regions. The bootloader updates the memory map for all the mappings it creates, so frames that are used by our kernel (code, data or stack) or to store the boot information are already marked as `InUse` or similar. Thus we can be sure that `Usable` frames are not used somewhere else. - Afterwards, we use the [`map`] combinator and Rust's [range syntax] to transform our iterator of memory regions to an iterator of address ranges. - The next step is the most complicated: We convert each range to an iterator through the `into_iter` method and then choose every 4096th address using [`step_by`]. Since 4096 bytes (= 4 KiB) is the page size, we get the start address of each frame. The bootloader page aligns all usable memory areas so that we don't need any alignment or rounding code here. By using [`flat_map`] instead of `map`, we get an `Iterator` instead of an `Iterator>`. -- Finally, we convert the start addresses to `PhysFrame` types to construct the desired `Iterator`. We then use this iterator to create and return a new `BootInfoFrameAllocator`. +- Then we convert the start addresses to `PhysFrame` types to construct the an `Iterator`. +- In the last step, we use the [`map`] combinator again to wrap each frame into the [`UnusedPhysFrame`] wrapper type. This is safe because we trust the boot information. [`MemoryRegion`]: https://docs.rs/bootloader/0.6.4/bootloader/bootinfo/struct.MemoryRegion.html [`filter`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html#method.filter @@ -938,7 +948,7 @@ This function uses iterator combinator methods to transform the initial `MemoryM [`step_by`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html#method.step_by [`flat_map`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html#method.flat_map -The return type of the function uses the [`impl Trait`] feature. This way, we can specify that we return some type that implements the [`Iterator`] trait with item type `PhysFrame`, but don't need to name the concrete return type. This is important here because we _can't_ name the conrete type since it depends on unnamable closure types. +The return type of the function uses the [`impl Trait`] feature. This way, we can specify that we return some type that implements the [`Iterator`] trait with item type `UnusedPhysFrame`, but don't need to name the concrete return type. This is important here because we _can't_ name the concrete type since it depends on unnamable closure types. [`impl Trait`]: https://doc.rust-lang.org/book/ch10-02-traits.html#returning-types-that-implement-traits [`Iterator`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html @@ -951,7 +961,7 @@ Now we can implement the `FrameAllocator` trait: // in src/memory.rs unsafe impl FrameAllocator for BootInfoFrameAllocator { - fn allocate_frame(&mut self) -> Option { + fn allocate_frame(&mut self) -> Option { let frame = self.usable_frames().nth(self.next); self.next += 1; frame @@ -994,6 +1004,8 @@ With the boot info frame allocator, the mapping succeeds and we see the black-on While our `create_example_mapping` function is just some example code, we are now able to create new mappings for arbitrary pages. This will be essential for allocating memory or implementing multithreading in future posts. +At this point, we should delete the `create_example_mapping` function again to avoid accidentally invoking undefined behavior, as explained [above](#a-create-example-mapping-function). + ## Summary In this post we learned about different techniques to access the physical frames of page tables, including identity mapping, mapping of the complete physical memory, temporary mapping, and recursive page tables. We chose to map the complete physical memory since it's simple, portable, and powerful. diff --git a/blog/content/second-edition/posts/10-heap-allocation/index.md b/blog/content/second-edition/posts/10-heap-allocation/index.md index dff0d0ca..6e907402 100644 --- a/blog/content/second-edition/posts/10-heap-allocation/index.md +++ b/blog/content/second-edition/posts/10-heap-allocation/index.md @@ -3,6 +3,9 @@ title = "Heap Allocation" weight = 10 path = "heap-allocation" date = 2019-06-26 + +[extra] +chapter = "Memory Management" +++ This post adds support for heap allocation to our kernel. First, it gives an introduction to dynamic memory and shows how the borrow checker prevents common allocation errors. It then implements the basic allocation interface of Rust, creates a heap memory region, and sets up an allocator crate. At the end of this post all the allocation and collection types of the built-in `alloc` crate will be available to our kernel. @@ -251,7 +254,7 @@ It defines the two required methods [`alloc`] and [`dealloc`], which correspond The trait additionally defines the two methods [`alloc_zeroed`] and [`realloc`] with default implementations: -- The [`alloc_zeroed`] method is equivalent to calling `alloc` and then setting the allocated memory block to zero, which is exactly what the provided default implementation does. An allocator implementations can override the default implementations with a more efficient custom implementation if possible. +- The [`alloc_zeroed`] method is equivalent to calling `alloc` and then setting the allocated memory block to zero, which is exactly what the provided default implementation does. An allocator implementation can override the default implementations with a more efficient custom implementation if possible. - The [`realloc`] method allows to grow or shrink an allocation. The default implementation allocates a new memory block with the desired size and copies over all the content from the previous allocation. Again, an allocator implementation can probably provide a more efficient implementation of this method, for example by growing/shrinking the allocation in-place if possible. [`alloc_zeroed`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#method.alloc_zeroed @@ -306,15 +309,13 @@ We now have a simple allocator, but we still have to tell the Rust compiler that The `#[global_allocator]` attribute tells the Rust compiler which allocator instance it should use as the global heap allocator. The attribute is only applicable to a `static` that implements the `GlobalAlloc` trait. Let's register an instance of our `Dummy` allocator as the global allocator: ```rust -// in src/lib.rs +// in src/allocator.rs #[global_allocator] -static ALLOCATOR: allocator::Dummy = allocator::Dummy; +static ALLOCATOR: Dummy = Dummy; ``` -Since the `Dummy` allocator is a [zero sized type], we don't need to specify any fields in the initialization expression. Note that the `#[global_allocator]` module [cannot be used in submodules][pr51335], so we need to put it into the `lib.rs`. - -[pr51335]: https://github.com/rust-lang/rust/pull/51335 +Since the `Dummy` allocator is a [zero sized type], we don't need to specify any fields in the initialization expression. When we now try to compile it, the first error should be gone. Let's fix the remaining second error: @@ -422,7 +423,7 @@ pub fn init_heap( .allocate_frame() .ok_or(MapToError::FrameAllocationFailed)?; let flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; - unsafe { mapper.map_to(page, frame, flags, frame_allocator)?.flush() }; + mapper.map_to(page, frame, flags, frame_allocator)?.flush(); } Ok(()) @@ -431,12 +432,12 @@ pub fn init_heap( The function takes mutable references to a [`Mapper`] and a [`FrameAllocator`] instance, both limited to 4KiB pages by using [`Size4KiB`] as generic parameter. The return value of the function is a [`Result`] with the unit type `()` as success variant and a [`MapToError`] as error variant, which is the error type returned by the [`Mapper::map_to`] method. Reusing the error type makes sense here because the `map_to` method is the main source of errors in this function. -[`Mapper`]:https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.Mapper.html -[`FrameAllocator`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/trait.FrameAllocator.html -[`Size4KiB`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page/enum.Size4KiB.html +[`Mapper`]:https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.Mapper.html +[`FrameAllocator`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/trait.FrameAllocator.html +[`Size4KiB`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page/enum.Size4KiB.html [`Result`]: https://doc.rust-lang.org/core/result/enum.Result.html -[`MapToError`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/enum.MapToError.html -[`Mapper::map_to`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.map_to +[`MapToError`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/enum.MapToError.html +[`Mapper::map_to`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/trait.Mapper.html#tymethod.map_to The implementation can be broken down into two parts: @@ -448,20 +449,20 @@ The implementation can be broken down into two parts: - We set the required `PRESENT` flag and the `WRITABLE` flag for the page. With these flags both read and write accesses are allowed, which makes sense for heap memory. - - We use the unsafe [`Mapper::map_to`] method for creating the mapping in the active page table. The method can fail, therefore we use the [question mark operator] again to forward the error to the caller. On success, the method returns a [`MapperFlush`] instance that we can use to update the [_translation lookaside buffer_] using the [`flush`] method. + - We use the [`Mapper::map_to`] method for creating the mapping in the active page table. The method can fail, therefore we use the [question mark operator] again to forward the error to the caller. On success, the method returns a [`MapperFlush`] instance that we can use to update the [_translation lookaside buffer_] using the [`flush`] method. -[`VirtAddr`]: https://docs.rs/x86_64/0.7.5/x86_64/struct.VirtAddr.html -[`Page`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page/struct.Page.html -[`containing_address`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page/struct.Page.html#method.containing_address -[`Page::range_inclusive`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/page/struct.Page.html#method.range_inclusive -[`FrameAllocator::allocate_frame`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/trait.FrameAllocator.html#tymethod.allocate_frame +[`VirtAddr`]: https://docs.rs/x86_64/0.8.1/x86_64/struct.VirtAddr.html +[`Page`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page/struct.Page.html +[`containing_address`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page/struct.Page.html#method.containing_address +[`Page::range_inclusive`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/page/struct.Page.html#method.range_inclusive +[`FrameAllocator::allocate_frame`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/trait.FrameAllocator.html#tymethod.allocate_frame [`None`]: https://doc.rust-lang.org/core/option/enum.Option.html#variant.None -[`MapToError::FrameAllocationFailed`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/enum.MapToError.html#variant.FrameAllocationFailed +[`MapToError::FrameAllocationFailed`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/enum.MapToError.html#variant.FrameAllocationFailed [`Option::ok_or`]: https://doc.rust-lang.org/core/option/enum.Option.html#method.ok_or [question mark operator]: https://doc.rust-lang.org/edition-guide/rust-2018/error-handling-and-panics/the-question-mark-operator-for-easier-error-handling.html -[`MapperFlush`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.MapperFlush.html +[`MapperFlush`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.MapperFlush.html [_translation lookaside buffer_]: @/second-edition/posts/08-paging-introduction/index.md#the-translation-lookaside-buffer -[`flush`]: https://docs.rs/x86_64/0.7.5/x86_64/structures/paging/mapper/struct.MapperFlush.html#method.flush +[`flush`]: https://docs.rs/x86_64/0.8.1/x86_64/structures/paging/mapper/struct.MapperFlush.html#method.flush The final step is to call this function from our `kernel_main`: @@ -520,7 +521,7 @@ linked_list_allocator = "0.6.4" Then we can replace our dummy allocator with the allocator provided by the crate: ```rust -// in src/lib.rs +// in src/allocator.rs use linked_list_allocator::LockedHeap; @@ -547,7 +548,7 @@ pub fn init_heap( // new unsafe { - super::ALLOCATOR.lock().init(HEAP_START, HEAP_SIZE); + ALLOCATOR.lock().init(HEAP_START, HEAP_SIZE); } Ok(()) @@ -745,10 +746,12 @@ As a third test, we create ten thousand allocations after each other: ```rust // in tests/heap_allocation.rs +use blog_os::allocator::HEAP_SIZE; + #[test_case] fn many_boxes() { serial_print!("many_boxes... "); - for i in 0..10_000 { + for i in 0..HEAP_SIZE { let x = Box::new(i); assert_eq!(*x, i); } diff --git a/blog/content/second-edition/posts/11-allocator-designs/allocation-fragmentation.svg b/blog/content/second-edition/posts/11-allocator-designs/allocation-fragmentation.svg new file mode 100644 index 00000000..b9c74784 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/allocation-fragmentation.svg @@ -0,0 +1,2 @@ + +
allocated
allocated
next
next
time
time
heap end
heap end
heap start
heap start
1
1
2
2
3
3
4
4
5
5
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/bump-allocation.svg b/blog/content/second-edition/posts/11-allocator-designs/bump-allocation.svg new file mode 100644 index 00000000..ed47bbb5 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/bump-allocation.svg @@ -0,0 +1,3 @@ + + +
Heap Start
<div>Heap Start</div>
Heap End
<div>Heap End</div>
next
next
Heap Start
<div>Heap Start</div>
Heap End
<div>Heap End</div>
next
next
Heap Start
<div>Heap Start</div>
Heap End
<div>Heap End</div>
next
next
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/fixed-size-block-example.svg b/blog/content/second-edition/posts/11-allocator-designs/fixed-size-block-example.svg new file mode 100644 index 00000000..2a2de226 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/fixed-size-block-example.svg @@ -0,0 +1,3 @@ + + +
heap start
heap start
head_16
<div>head_16</div>
head_64
<div>head_64</div>
head_512
<div>head_512</div>
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/index.md b/blog/content/second-edition/posts/11-allocator-designs/index.md new file mode 100644 index 00000000..e2378592 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/index.md @@ -0,0 +1,1245 @@ ++++ +title = "Allocator Designs" +weight = 11 +path = "allocator-designs" +date = 2020-01-20 + +[extra] +chapter = "Memory Management" ++++ + +This post explains how to implement heap allocators from scratch. It presents and discusses different allocator designs, including bump allocation, linked list allocation, and fixed-size block allocation. For each of the three designs, we will create a basic implementation that can be used for our kernel. + + + +This blog is openly developed on [GitHub]. If you have any problems or questions, please open an issue there. You can also leave comments [at the bottom]. The complete source code for this post can be found in the [`post-11`][post branch] branch. + +[GitHub]: https://github.com/phil-opp/blog_os +[at the bottom]: #comments +[post branch]: https://github.com/phil-opp/blog_os/tree/post-11 + + + +## Introduction + +In the [previous post] we added basic support for heap allocations to our kernel. For that, we [created a new memory region][map-heap] in the page tables and [used the `linked_list_allocator` crate][use-alloc-crate] to manage that memory. While we have a working heap now, we left most of the work to the allocator crate without trying to understand how it works. + +[previous post]: @/second-edition/posts/10-heap-allocation/index.md +[map-heap]: @/second-edition/posts/10-heap-allocation/index.md#creating-a-kernel-heap +[use-alloc-crate]: @/second-edition/posts/10-heap-allocation/index.md#using-an-allocator-crate + +In this post, we will show how to create our own heap allocator from scratch instead of relying on an existing allocator crate. We will discuss different allocator designs, including a simplistic _bump allocator_ and a basic _fixed-size block allocator_, and use this knowledge to implement an allocator with improved performance (compared to the `linked_list_allocator` crate). + +### Design Goals + +The responsibility of an allocator is to manage the available heap memory. It needs to return unused memory on `alloc` calls and keep track of memory freed by `dealloc` so that it can be reused again. Most importantly, it must never hand out memory that is already in use somewhere else because this would cause undefined behavior. + +Apart from correctness, there are many secondary design goals. For example, the allocator should effectively utilize the available memory and keep [_fragmentation_] low. Furthermore, it should work well for concurrent applications and scale to any number of processors. For maximal performance, it could even optimize the memory layout with respect to the CPU caches to improve [cache locality] and avoid [false sharing]. + +[cache locality]: http://docs.cray.com/books/S-2315-50/html-S-2315-50/qmeblljm.html +[_fragmentation_]: https://en.wikipedia.org/wiki/Fragmentation_(computing) +[false sharing]: http://mechanical-sympathy.blogspot.de/2011/07/false-sharing.html + +These requirements can make good allocators very complex. For example, [jemalloc] has over 30.000 lines of code. This complexity is often undesired in kernel code where a single bug can lead to severe security vulnerabilities. Fortunately, the allocation patterns of kernel code are often much simpler compared to userspace code, so that relatively simple allocator designs often suffice. + +[jemalloc]: http://jemalloc.net/ + +In the following we present three possible kernel allocator designs and explain their advantages and drawbacks. + +## Bump Allocator + +The most simple allocator design is a _bump allocator_. It allocates memory linearly and only keeps track of the number of allocated bytes and the number of allocations. It is only useful in very specific use cases because it has a severe limitation: it can only free all memory at once. + +### Idea + +The idea behind a bump allocator is to linearly allocate memory by increasing (_"bumping"_) a `next` variable, which points at the beginning of the unused memory. At the beginning, `next` is equal to the start address of the heap. On each allocation, `next` is increased by the allocation so that it always points to the boundary between used and unused memory: + +![The heap memory area at three points in time: + 1: A single allocation exists at the start of the heap; the `next` pointer points to its end + 2: A second allocation was added right after the first; the `next` pointer points to the end of the second allocation + 3: A third allocation was added right after the second one; the `next pointer points to the end of the third allocation](bump-allocation.svg) + +The `next` pointer only moves in a single direction and thus never hands out the same memory region twice. When it reaches the end of the heap, no more memory can be allocated, resulting in an out-of-memory error on the next allocation. + +A bump allocator is often implemented with an allocation counter, which is increased by 1 on each `alloc` call and decreased by 1 on each `dealloc` call. When the allocation counter reaches zero it means that all allocations on the heap were deallocated. In this case, the `next` pointer can be reset to the start address of the heap, so that the complete heap memory is available to allocations again. + +### Implementation + +We start our implementation by declaring a new `allocator::bump` submodule: + +```rust +// in src/allocator.rs + +pub mod bump; +``` + +The content of the submodule lives in a new `src/allocator/bump.rs` file, which we create with the following content: + +```rust +// in src/allocator/bump.rs + +pub struct BumpAllocator { + heap_start: usize, + heap_end: usize, + next: usize, + allocations: usize, +} + +impl BumpAllocator { + /// Creates a new empty bump allocator. + pub const fn new() -> Self { + BumpAllocator { + heap_start: 0, + heap_end: 0, + next: 0, + allocations: 0, + } + } + + /// Initializes the bump allocator with the given heap bounds. + /// + /// This method is unsafe because the caller must ensure that the given + /// memory range is unused. Also, this method must be called only once. + pub unsafe fn init(&mut self, heap_start: usize, heap_size: usize) { + self.heap_start = heap_start; + self.heap_end = heap_start + heap_size; + self.next = heap_start; + } +} +``` + +The `heap_start` and `heap_end` fields keep track of the lower and upper bound of the heap memory region. The caller needs to ensure that these addresses are valid, otherwise the allocator would return invalid memory. For this reason, the `init` function needs to be `unsafe` to call. + +The purpose of the `next` field is to always point to the first unused byte of the heap, i.e. the start address of the next allocation. It is set to `heap_start` in the `init` function because at the beginning the complete heap is unused. On each allocation, this field will be increased by the allocation size (_"bumped"_) to ensure that we don't return the same memory region twice. + +The `allocations` field is a simple counter for the active allocations with the goal of resetting the allocator after the last allocation was freed. It is initialized with 0. + +We chose to create a separate `init` function instead of performing the initialization directly in `new` in order to keep the interface identical to the allocator provided by the `linked_list_allocator` crate. This way, the allocators can be switched without additional code changes. + +### Implementing `GlobalAlloc` + +As [explained in the previous post][global-alloc], all heap allocators need to implement the [`GlobalAlloc`] trait, which is defined like this: + +[global-alloc]: @/second-edition/posts/10-heap-allocation/index.md#the-allocator-interface +[`GlobalAlloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html + +```rust +pub unsafe trait GlobalAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8; + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout); + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { ... } + unsafe fn realloc( + &self, + ptr: *mut u8, + layout: Layout, + new_size: usize + ) -> *mut u8 { ... } +} +``` + +Only the `alloc` and `dealloc` methods are required, the other two methods have default implementations and can be omitted. + +#### First Implementation Attempt + +Let's try to implement the `alloc` method for our `BumpAllocator`: + +```rust +// in src/allocator/bump.rs + +use alloc::alloc::{GlobalAlloc, Layout}; + +unsafe impl GlobalAlloc for BumpAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // TODO alignment and bounds check + let alloc_start = self.next; + self.next = alloc_start + layout.size(); + self.allocations += 1; + alloc_start as *mut u8 + } + + unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) { + todo!(); + } +} +``` + +First, we use the `next` field as the start address for our allocation. Then we update the `next` field to point at the end address of the allocation, which is the next unused address on the heap. Before returning the start address of the allocation as a `*mut u8` pointer, we increase the `allocations` counter by 1. + +Note that we don't perform any bounds checks or alignment adjustments, so this implementation is not safe yet. This does not matter much because it fails to compile anyway with the following error: + +``` +error[E0594]: cannot assign to `self.next` which is behind a `&` reference + --> src/allocator/bump.rs:29:9 + | +26 | unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + | ----- help: consider changing this to be a mutable reference: `&mut self` +... +29 | self.next = alloc_start + layout.size(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `self` is a `&` reference, so the data it refers to cannot be written +``` + +(The same error also occurs for the `self.allocations += 1` line. We omitted it here for brevity.) + +The error occurs because the [`alloc`] and [`dealloc`] methods of the `GlobalAlloc` trait only operate on an immutable `&self` reference, so updating the `next` and `allocations` fields is not possible. This is problematic because updating `next` on every allocation is the essential principle of a bump allocator. + +[`alloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.alloc +[`dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc + +Note that the compiler suggestion to change `&self` to `&mut self` in the method declaration does not work here. The reason is that the method signature is defined by the `GlobalAlloc` trait and can't be changed on the implementation side. (I opened an [issue](https://github.com/rust-lang/rust/issues/68049) in the Rust repository about the invalid suggestion.) + +#### `GlobalAlloc` and Mutability + +Before we look at a possible solution to this mutability problem, let's try to understand why the `GlobalAlloc` trait methods are defined with `&self` arguments: As we saw [in the previous post][global-allocator], the global heap allocator is defined by adding the `#[global_allocator]` attribute to a `static` that implements the `GlobalAlloc` trait. Static variables are immutable in Rust, so there is no way to call a method that takes `&mut self` on the static allocator. For this reason, all the methods of `GlobalAlloc` only take an immutable `&self` reference. + +[global-allocator]: @/second-edition/posts/10-heap-allocation/index.md#the-global-allocator-attribute + +Fortunately there is a way how to get a `&mut self` reference from a `&self` reference: We can use synchronized [interior mutability] by wrapping the allocator in a [`spin::Mutex`] spinlock. This type provides a `lock` method that performs [mutual exclusion] and thus safely turns a `&self` reference to a `&mut self` reference. We already used the wrapper type multiple times in our kernel, for example for the [VGA text buffer][vga-mutex]. + +[interior mutability]: https://doc.rust-lang.org/book/ch15-05-interior-mutability.html +[vga-mutex]: @/second-edition/posts/03-vga-text-buffer/index.md#spinlocks +[`spin::Mutex`]: https://docs.rs/spin/0.5.0/spin/struct.Mutex.html +[mutual exclusion]: https://en.wikipedia.org/wiki/Mutual_exclusion + +#### A `Locked` Wrapper Type + +With the help of the `spin::Mutex` wrapper type we can implement the `GlobalAlloc` trait for our bump allocator. The trick is to implement the trait not for the `BumpAllocator` directly, but for the wrapped `spin::Mutex` type: + +```rust +unsafe impl GlobalAlloc for spin::Mutex {…} +``` + +Unfortunately, this still doesn't work because the Rust compiler does not permit trait implementations for types defined in other crates: + +``` +error[E0117]: only traits defined in the current crate can be implemented for arbitrary types + --> src/allocator/bump.rs:28:1 + | +28 | unsafe impl GlobalAlloc for spin::Mutex { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------------------- + | | | + | | `spin::mutex::Mutex` is not defined in the current crate + | impl doesn't use only types from inside the current crate + | + = note: define and implement a trait or new type instead +``` + +To fix this, we need to create our own wrapper type around `spin::Mutex`: + +```rust +// in src/allocator.rs + +/// A wrapper around spin::Mutex to permit trait implementations. +pub struct Locked { + inner: spin::Mutex, +} + +impl Locked { + pub const fn new(inner: A) -> Self { + Locked { + inner: spin::Mutex::new(inner), + } + } + + pub fn lock(&self) -> spin::MutexGuard { + self.inner.lock() + } +} +``` + +The type is a generic wrapper around a `spin::Mutex`. It imposes no restrictions on the wrapped type `A`, so it can be used to wrap all kinds of types, not just allocators. It provides a simple `new` constructor function that wraps a given value. For convenience, it also provides a `lock` function that calls `lock` on the wrapped `Mutex`. Since the `Locked` type is general enough to be useful for other allocator implementations too, we put it in the parent `allocator` module. + +#### Implementation for `Locked` + +The `Locked` type is defined in our own crate (in contrast to `spin::Mutex`), so we can use it to implement `GlobalAlloc` for our bump allocator. The full implementation looks like this: + +```rust +// in src/allocator/bump.rs + +use super::{align_up, Locked}; +use alloc::alloc::{GlobalAlloc, Layout}; +use core::ptr; + +unsafe impl GlobalAlloc for Locked { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let mut bump = self.lock(); // get a mutable reference + + let alloc_start = align_up(bump.next, layout.align()); + let alloc_end = match alloc_start.checked_add(layout.size()) { + Some(end) => end, + None => return ptr::null_mut(), + }; + + if alloc_end > bump.heap_end { + ptr::null_mut() // out of memory + } else { + bump.next = alloc_end; + bump.allocations += 1; + alloc_start as *mut u8 + } + } + + unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) { + let mut bump = self.lock(); // get a mutable reference + + bump.allocations -= 1; + if bump.allocations == 0 { + bump.next = bump.heap_start; + } + } +} +``` + +The first step for both `alloc` and `dealloc` is to call the [`Mutex::lock`] method through the `inner` field to get a mutable reference to the wrapped allocator type. The instance remains locked until the end of the method, so that no data race can occur in multithreaded contexts (we will add threading support soon). + +[`Mutex::lock`]: https://docs.rs/spin/0.5.0/spin/struct.Mutex.html#method.lock + +Compared to the previous prototype, the `alloc` implementation now respects alignment requirements and performs a bounds check to ensure that the allocations stay inside the heap memory region. The first step is to round up the `next` address to the alignment specified by the `Layout` argument. The code for the `align_up` function is shown in a moment. We then add the requested allocation size to `alloc_start` to get the end address of the allocation. To prevent integer overflow on large allocations, we use the [`checked_add`] method. If an overflow occurs or if the resulting end address of the allocation is larger than the end address of the heap, we return a null pointer to signal an out-of-memory situation. Otherwise, we update the `next` address and increase the `allocations` counter by 1 like before. Finally, we return the `alloc_start` address converted to a `*mut u8` pointer. + +[`checked_add`]: https://doc.rust-lang.org/std/primitive.usize.html#method.checked_add +[`Layout`]: https://doc.rust-lang.org/alloc/alloc/struct.Layout.html + +The `dealloc` function ignores the given pointer and `Layout` arguments. Instead, it just decreases the `allocations` counter. If the counter reaches `0` again, it means that all allocations were freed again. In this case, it resets the `next` address to the `heap_start` address to make the complete heap memory available again. + +#### Address Alignment + +The `align_up` function is general enough that we can put it into the parent `allocator` module. A basic implementation looks like this: + +```rust +// in src/allocator.rs + +/// Align the given address `addr` upwards to alignment `align`. +fn align_up(addr: usize, align: usize) -> usize { + let remainder = addr % align; + if remainder == 0 { + addr // addr already aligned + } else { + addr - remainder + align + } +} +``` + +The function first computes the [remainder] of the division of `addr` by `align`. If the remainder is `0`, the address is already aligned with the given alignment. Otherwise, we align the address by subtracting the remainder (so that the new remainder is 0) and then adding the alignment (so that the address does not become smaller than the original address). + +[remainder]: https://en.wikipedia.org/wiki/Euclidean_division + +Note that this isn't the most efficient way to implement this function. A much faster implementation looks like this: + +```rust +/// Align the given address `addr` upwards to alignment `align`. +/// +/// Requires that `align` is a power of two. +fn align_up(addr: usize, align: usize) -> usize { + (addr + align - 1) & !(align - 1) +} +``` + +This method utilizes that the `GlobalAlloc` trait guarantees that `align` is always a power of two. This makes it possible to create a [bitmask] to align the address in a very efficient way. To understand how it works, let's go through it step by step starting on the right side: + +[bitmask]: https://en.wikipedia.org/wiki/Mask_(computing) + +- Since `align` is a power of two, its [binary representation] has only a single bit set (e.g. `0b000100000`). This means that `align - 1` has all the lower bits set (e.g. `0b00011111`). +- By creating the [bitwise `NOT`] through the `!` operator, we get a number that has all the bits set except for the bits lower than `align` (e.g. `0b…111111111100000`). +- By performing a [bitwise `AND`] on an address and `!(align - 1)`, we align the address _downwards_. This works by clearing all the bits that are lower than `align`. +- Since we want to align upwards instead of downwards, we increase the `addr` by `align - 1` before performing the bitwise `AND`. This way, already aligned addresses remain the same while non-aligned addresses are rounded to the next alignment boundary. + +[binary representation]: https://en.wikipedia.org/wiki/Binary_number#Representation +[bitwise `NOT`]: https://en.wikipedia.org/wiki/Bitwise_operation#NOT +[bitwise `AND`]: https://en.wikipedia.org/wiki/Bitwise_operation#AND + +Which variant you choose it up to you. Both compute the same result, only using different methods. + +### Using It + +To use the bump allocator instead of the `linked_list_allocator` crate, we need to update the `ALLOCATOR` static in `allocator.rs`: + +```rust +// in src/allocator.rs + +use bump::BumpAllocator; + +#[global_allocator] +static ALLOCATOR: Locked = Locked::new(BumpAllocator::new()); +``` + +Here it becomes important that we declared `BumpAllocator::new` and `Locked::new` as [`const` functions]. If they were normal functions, a compilation error would occur because the initialization expression of a `static` must evaluable at compile time. + +[`const` functions]: https://doc.rust-lang.org/reference/items/functions.html#const-functions + +We don't need to change the `ALLOCATOR.lock().init(HEAP_START, HEAP_SIZE)` call in our `init_heap` function because the bump allocator provides the same interface as the allocator provided by the `linked_list_allocator`. + +Now our kernel uses our bump allocator! Everything should still work, including the [`heap_allocation` tests] that we created in the previous post: + +[`heap_allocation` tests]: @/second-edition/posts/10-heap-allocation/index.md#adding-a-test + +``` +> cargo xtest --test heap_allocation +[…] +Running 3 tests +simple_allocation... [ok] +large_vec... [ok] +many_boxes... [ok] +``` + +### Discussion + +The big advantage of bump allocation is that it's very fast. Compared to other allocator designs (see below) that need to actively look for a fitting memory block and perform various bookkeeping tasks on `alloc` and `dealloc`, a bump allocator [can be optimized][bump downwards] to just a few assembly instructions. This makes bump allocators useful for optimizing the allocation performance, for example when creating a [virtual DOM library]. + +[bump downwards]: https://fitzgeraldnick.com/2019/11/01/always-bump-downwards.html +[virtual DOM library]: https://hacks.mozilla.org/2019/03/fast-bump-allocated-virtual-doms-with-rust-and-wasm/ + +While a bump allocator is seldom used as the global allocator, the principle of bump allocation is often applied in form of [arena allocation], which basically batches individual allocations together to improve performance. An example for an arena allocator for Rust is the [`toolshed`] crate. + +[arena allocation]: https://mgravell.github.io/Pipelines.Sockets.Unofficial/docs/arenas.html +[`toolshed`]: https://docs.rs/toolshed/0.8.1/toolshed/index.html + +#### The Drawback of a Bump Allocator + +The main limitation of a bump allocator is that it can only reuse deallocated memory after all allocations have been freed. This means that a single long-lived allocation suffices to prevent memory reuse. We can see this when we add a variation of the `many_boxes` test: + +```rust +// in tests/heap_allocation.rs + +#[test_case] +fn many_boxes_long_lived() { + serial_print!("many_boxes_long_lived... "); + let long_lived = Box::new(1); // new + for i in 0..HEAP_SIZE { + let x = Box::new(i); + assert_eq!(*x, i); + } + assert_eq!(*long_lived, 1); // new + serial_println!("[ok]"); +} +``` + +Like the `many_boxes` test, this test creates a large number of allocations to provoke an out-of-memory failure if the allocator does not reuse freed memory. Additionally, the test creates a `long_lived` allocation, which lives for the whole loop execution. + +When we try run our new test, we see that it indeed fails: + +``` +> cargo xtest --test heap_allocation +Running 4 tests +simple_allocation... [ok] +large_vec... [ok] +many_boxes... [ok] +many_boxes_long_lived... [failed] + +Error: panicked at 'allocation error: Layout { size_: 8, align_: 8 }', src/lib.rs:86:5 +``` + +Let's try to understand why this failure occurs in detail: First, the `long_lived` allocation is created at the start of the heap, thereby increasing the `allocations` counter by 1. For each iteration of the loop, a short lived allocation is created and directly freed again before the next iteration starts. This means that the `allocations` counter is temporarily increased to 2 at the beginning of an iteration and decreased to 1 at the end of it. The problem now is that the bump allocator can only reuse memory when _all_ allocations have been freed, i.e. the `allocations` counter falls to 0. Since this doesn't happen before the end of the loop, each loop iteration allocates a new region of memory, leading to an out-of-memory error after a number of iterations. + +#### Fixing the Test? + +There are two potential tricks that we could utilize to fix the test for our bump allocator: + +- We could update `dealloc` to check whether the freed allocation was the last allocation returned by `alloc` by comparing its end address with the `next` pointer. In case they're equal, we can safely reset `next` back to the start address of the freed allocation. This way, each loop iteration reuses the same memory block. +- We could add an `alloc_back` method that allocates memory from the _end_ of the heap using an additional `next_back` field. Then we could manually use this allocation method for all long-lived allocations, thereby separating short-lived and long-lived allocations on the heap. Note that this separation only works if it's clear beforehand how long each allocation lives. Another drawback of this approach is that manually performing allocations is cumbersome and potentially unsafe. + +While both of these approaches work to fix the test, they are no general solution since they are only able to reuse memory in very specific cases. The question is: Is there a general solution that reuses _all_ freed memory? + +#### Reusing All Freed Memory? + +As we learned [in the previous post][heap-intro], allocations can live arbitrarily long and can be freed in an arbitrary order. This means that we need to keep track of a potentially unbounded number of non-continuous, unused memory regions, as illustrated by the following example: + +[heap-intro]: @/second-edition/posts/10-heap-allocation/index.md#dynamic-memory + +![](allocation-fragmentation.svg) + +The graphic shows the heap over the course of time. At the beginning, the complete heap is unused and the `next` address is equal to `heap_start` (line 1). Then the first allocation occurs (line 2). In line 3, a second memory block is allocated and the first allocation is freed. Many more allocations are added in line 4. Half of them are very short-lived and already get freed in line 5, where also another new allocation is added. + +Line 5 shows the fundamental problem: We have five unused memory regions with different sizes in total, but the `next` pointer can only point to the beginning of the last region. While we could store the start addresses and sizes of the other unused memory regions in an array of size 4 for this example, this isn't a general solution since we could easily create an example with 8, 16, or 1000 unused memory regions. + +Normally when we have a potentially unbounded number of items, we can just use a heap allocated collection. This isn't really possible in our case, since the heap allocator can't depend on itself (it would cause endless recursion or deadlocks). So we need to find a different solution. + +## Linked List Allocator + +A common trick to keep track of an arbitrary number of free memory areas when implementing allocators is to use these areas itself as backing storage. This utilizes the fact that the regions are still mapped to a virtual address and backed by a physical frame, but the stored information is not needed anymore. By storing the information about the freed region in the region itself, we can keep track of an unbounded number of freed regions without needing additional memory. + +The most common implementation approach is to construct a single linked list in the freed memory, with each node being a freed memory region: + +![](linked-list-allocation.svg) + +Each list node contains two fields: The size of the memory region and a pointer to the next unused memory region. With this approach, we only need a pointer to the first unused region (called `head`) to keep track of all unused regions, independent of their number. The resulting data structure is often called a [_free list_]. + +[_free list_]: https://en.wikipedia.org/wiki/Free_list + +As you might guess from the name, this is the technique that the `linked_list_allocator` crate uses. + +### Implementation + +In the following, we will create our own simple `LinkedListAllocator` type that uses the above approach for keeping track of freed memory regions. This part of the post isn't required for future posts, so you can skip the implementation details if you like. + +#### The Allocator Type + +We start by creating a private `ListNode` struct in a new `allocator::linked_list` submodule: + +```rust +// in src/allocator.rs + +pub mod linked_list; +``` + +```rust +// in src/allocator/linked_list.rs + +struct ListNode { + size: usize, + next: Option<&'static mut ListNode>, +} +``` + +Like in the graphic, a list node has a `size` field and an optional pointer to the next node, represented by the `Option<&'static mut ListNode>` type. The `&'static mut` type semantically describes an [owned] object behind a pointer. Basically, it's a [`Box`] without a destructor that frees the object at the end of the scope. + +[owned]: https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html +[`Box`]: https://doc.rust-lang.org/alloc/boxed/index.html + +We implement the following set of methods for `ListNode`: + +```rust +// in src/allocator/linked_list.rs + +impl ListNode { + const fn new(size: usize) -> Self { + ListNode { size, next: None } + } + + fn start_addr(&self) -> usize { + self as *const Self as usize + } + + fn end_addr(&self) -> usize { + self.start_addr() + self.size + } +} +``` + +The type has a simple constructor function named `new` and methods to calculate the start and end addresses of the represented region. We make the `new` function a [const function], which will be required later when constructing a static linked list allocator. Note that any use of mutable references in const functions (including setting the `next` field to `None`) is still unstable. In order to get it to compile, we need to add **`#![feature(const_fn)]`** to the beginning of our `lib.rs`. + +[const function]: https://doc.rust-lang.org/reference/items/functions.html#const-functions + +With the `ListNode` struct as building block, we can now create the `LinkedListAllocator` struct: + +```rust +// in src/allocator/linked_list.rs + +pub struct LinkedListAllocator { + head: ListNode, +} + +impl LinkedListAllocator { + /// Creates an empty LinkedListAllocator. + pub const fn new() -> Self { + Self { + head: ListNode::new(0), + } + } + + /// Initialize the allocator with the given heap bounds. + /// + /// This function is unsafe because the caller must guarantee that the given + /// heap bounds are valid and that the heap is unused. This method must be + /// called only once. + pub unsafe fn init(&mut self, heap_start: usize, heap_size: usize) { + self.add_free_region(heap_start, heap_size); + } + + /// Adds the given memory region to the front of the list. + unsafe fn add_free_region(&mut self, addr: usize, size: usize) { + todo!(); + } +} +``` + +The struct contains a `head` node that points to the first heap region. We are only interested in the value of the `next` pointer, so we set the `size` to 0 in the `ListNone::new` function. Making `head` a `ListNode` instead of just a `&'static mut ListNode` has the advantage that the implementation of the `alloc` method will be simpler. + +Like for the bump allocator, the `new` function doesn't initialize the allocator with the heap bounds. In addition to maintaining API compatibility, the reason is that the initialization routine requires to write a node to the heap memory, which can only happen at runtime. The `new` function, however, needs to be a [`const` function] that can be evaluated at compile time, because it will be used for initializing the `ALLOCATOR` static. For this reason, we again provide a separate, non-constant `init` method. + +[`const` function]: https://doc.rust-lang.org/reference/items/functions.html#const-functions + +The `init` method uses a `add_free_region` method, whose implementation will be shown in a moment. For now, we use the [`todo!`] macro to provide a placeholder implementation that always panics. + +[`todo!`]: https://doc.rust-lang.org/core/macro.todo.html + +#### The `add_free_region` Method + +The `add_free_region` method provides the fundamental _push_ operation on the linked list. We currently only call this method from `init`, but it will also be the central method in our `dealloc` implementation. Remember, the `dealloc` method is called when an allocated memory region is freed again. To keep track of this freed memory region, we want to push it to the linked list. + +The implementation of the `add_free_region` method looks like this: + +```rust +// in src/allocator/linked_list.rs + +use super::align_up; +use core::mem; + +impl LinkedListAllocator { + /// Adds the given memory region to the front of the list. + unsafe fn add_free_region(&mut self, addr: usize, size: usize) { + // ensure that the freed region is capable of holding ListNode + assert!(align_up(addr, mem::align_of::()) == addr); + assert!(size >= mem::size_of::()); + + // create a new list node and append it at the start of the list + let mut node = ListNode::new(size); + node.next = self.head.next.take(); + let node_ptr = addr as *mut ListNode; + node_ptr.write(node); + self.head.next = Some(&mut *node_ptr) + } +} +``` + +The method takes a memory region represented by an address and size as argument and adds it to the front of the list. First, it ensures that the given region has the necessary size and alignment for storing a `ListNode`. Then it creates the node and inserts it to the list through the following steps: + +![](linked-list-allocator-push.svg) + +Step 0 shows the state of the heap before `add_free_region` is called. In step 1, the method is called with the memory region marked as `freed` in the graphic. After the initial checks, the method creates a new `node` on its stack with the size of the freed region. It then uses the [`Option::take`] method to set the `next` pointer of the node to the current `head` pointer, thereby resetting the `head` pointer to `None`. + +[`Option::take`]: https://doc.rust-lang.org/core/option/enum.Option.html#method.take + +In step 2, the method writes the newly created `node` to the beginning of the freed memory region through the [`write`] method. It then points the `head` pointer to the new node. The resulting pointer structure looks a bit chaotic because the freed region is always inserted at the beginning of the list, but if we follow the pointers we see that each free region is still reachable from the `head` pointer. + +[`write`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.write + +#### The `find_region` Method + +The second fundamental operation on a linked list is finding an entry and removing it from the list. This is the central operation needed for implementing the `alloc` method. We implement the operation as a `find_region` method in the following way: + +```rust +// in src/allocator/linked_list.rs + +impl LinkedListAllocator { + /// Looks for a free region with the given size and alignment and removes + /// it from the list. + /// + /// Returns a tuple of the list node and the start address of the allocation. + fn find_region(&mut self, size: usize, align: usize) + -> Option<(&'static mut ListNode, usize)> + { + // reference to current list node, updated for each iteration + let mut current = &mut self.head; + // look for a large enough memory region in linked list + while let Some(ref mut region) = current.next { + if let Ok(alloc_start) = Self::alloc_from_region(®ion, size, align) { + // region suitable for allocation -> remove node from list + let next = region.next.take(); + let ret = Some((current.next.take().unwrap(), alloc_start)); + current.next = next; + return ret; + } else { + // region not suitable -> continue with next region + current = current.next.as_mut().unwrap(); + } + } + + // no suitable region found + None + } +} +``` + +The method uses a `current` variable and a [`while let` loop] to iterate over the list elements. At the beginning, `current` is set to the (dummy) `head` node. On each iteration, it is then updated to to the `next` field of the current node (in the `else` block). If the region is suitable for an allocation with the given size and alignment, the region is removed from the list and returned together with the `alloc_start` address. + +[`while let` loop]: https://doc.rust-lang.org/reference/expressions/loop-expr.html#predicate-pattern-loops + +When the `current.next` pointer becomes `None`, the loop exits. This means that we iterated over the whole list but found no region that is suitable for an allocation. In that case, we return `None`. The check whether a region is suitable is done by a `alloc_from_region` function, whose implementation will be shown in a moment. + +Let's take a more detailed look at how a suitable region is removed from the list: + +![](linked-list-allocator-remove-region.svg) + +Step 0 shows the situation before any pointer adjustments. The `region` and `current` regions and the `region.next` and `current.next` pointers are marked in the graphic. In step 1, both the `region.next` and `current.next` pointers are reset to `None` by using the [`Option::take`] method. The original pointers are stored in local variables called `next` and `ret`. + +In step 2, the `current.next` pointer is set to the local `next` pointer, which is the original `region.next` pointer. The effect is that `current` now directly points to the region after `region`, so that `region` is no longer element of the linked list. The function then returns the pointer to `region` stored in the local `ret` variable. + +##### The `alloc_from_region` Function + +The `alloc_from_region` function returns whether a region is suitable for an allocation with given size and alignment. It is defined like this: + +```rust +// in src/allocator/linked_list.rs + +impl LinkedListAllocator { + /// Try to use the given region for an allocation with given size and + /// alignment. + /// + /// Returns the allocation start address on success. + fn alloc_from_region(region: &ListNode, size: usize, align: usize) + -> Result + { + let alloc_start = align_up(region.start_addr(), align); + let alloc_end = alloc_start.checked_add(size).ok_or(())?; + + if alloc_end > region.end_addr() { + // region too small + return Err(()); + } + + let excess_size = region.end_addr() - alloc_end; + if excess_size > 0 && excess_size < mem::size_of::() { + // rest of region too small to hold a ListNode (required because the + // allocation splits the region in a used and a free part) + return Err(()); + } + + // region suitable for allocation + Ok(alloc_start) + } +} +``` + +First, the function calculates the start and end address of a potential allocation, using the `align_up` function we defined earlier and the [`checked_add`] method. If an overflow occurs or if the end address is behind the end address of the region, the allocation doesn't fit in the region and we return an error. + +The function performs a less obvious check after that. This check is necessary because most of the time an allocation does not fit a suitable region perfectly, so that a part of the region remains usable after the allocation. This part of the region must store its own `ListNode` after the allocation, so it must be large enough to do so. The check verifies exactly that: either the allocation fits perfectly (`excess_size == 0`) or the excess size is large enough to store a `ListNode`. + +#### Implementing `GlobalAlloc` + +With the fundamental operations provided by the `add_free_region` and `find_region` methods, we can now finally implement the `GlobalAlloc` trait. As with the bump allocator, we don't implement the trait directly for the `LinkedListAllocator`, but only for a wrapped `Locked`. The [`Locked` wrapper] adds interior mutability through a spinlock, which allows us to modify the allocator instance even though the `alloc` and `dealloc` methods only take `&self` references. + +[`Locked` wrapper]: @/second-edition/posts/11-allocator-designs/index.md#a-locked-wrapper-type + +The implementation looks like this: + +```rust +// in src/allocator/linked_list.rs + +use super::Locked; +use alloc::alloc::{GlobalAlloc, Layout}; +use core::ptr; + +unsafe impl GlobalAlloc for Locked { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // perform layout adjustments + let (size, align) = LinkedListAllocator::size_align(layout); + let mut allocator = self.lock(); + + if let Some((region, alloc_start)) = allocator.find_region(size, align) { + let alloc_end = alloc_start.checked_add(size).expect("overflow"); + let excess_size = region.end_addr() - alloc_end; + if excess_size > 0 { + allocator.add_free_region(alloc_end, excess_size); + } + alloc_start as *mut u8 + } else { + ptr::null_mut() + } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // perform layout adjustments + let (size, _) = LinkedListAllocator::size_align(layout); + + self.lock().add_free_region(ptr as usize, size) + } +} +``` + +Let's start with the `dealloc` method because it is simpler: First, it performs some layout adjustments, which we will explain in a moment, and retrieves a `&mut LinkedListAllocator` reference by calling the [`Mutex::lock`] function on the [`Locked` wrapper]. Then it calls the `add_free_region` function to add the deallocated region to the free list. + +The `alloc` method is a bit more complex. It starts with the same layout adjustments and also calls the [`Mutex::lock`] function to receive a mutable allocator reference. Then it uses the `find_region` method to find a suitable memory region for the allocation and remove it from the list. If this doesn't succeed and `None` is returned, it returns `null_mut` to signal an error as there is no suitable memory region. + +In the success case, the `find_region` method returns a tuple of the suitable region (no longer in the list) and the start address of the allocation. Using `alloc_start`, the allocation size, and the end address of the region, it calculates the end address of the allocation and the excess size again. If the excess size is not null, it calls `add_free_region` to add the excess size of the memory region back to the free list. Finally, it returns the `alloc_start` address casted as a `*mut u8` pointer. + +#### Layout Adjustments + +So what are these layout adjustments that we do at the beginning of both `alloc` and `dealloc`? They ensure that each allocated block is capable of storing a `ListNode`. This is important because the memory block is going to be deallocated at some point, where we want to write a `ListNode` to it. If the block is smaller than a `ListNode` or does not have the correct alignment, undefined behavior can occur. + +The layout adjustments are performed by a `size_align` function, which is defined like this: + +```rust +// in src/allocator/linked_list.rs + +impl LinkedListAllocator { + /// Adjust the given layout so that the resulting allocated memory + /// region is also capable of storing a `ListNode`. + /// + /// Returns the adjusted size and alignment as a (size, align) tuple. + fn size_align(layout: Layout) -> (usize, usize) { + let layout = layout + .align_to(mem::align_of::()) + .expect("adjusting alignment failed") + .pad_to_align(); + let size = layout.size().max(mem::size_of::()); + (size, layout.align()) + } +} +``` + +First, the function uses the [`align_to`] method on the passed [`Layout`] to increase the alignment to the alignment of a `ListNode` if necessary. It then uses the [`pad_to_align`] method to round up the size to a multiple of the alignment to ensure that the start address of the next memory block will have the correct alignment for storing a `ListNode` too. +In the second step it uses the [`max`] method to enforce a minimum allocation size of `mem::size_of::`. This way, the `dealloc` function can safely write a `ListNode` to the freed memory block. + +[`align_to`]: https://doc.rust-lang.org/core/alloc/struct.Layout.html#method.align_to +[`pad_to_align`]: https://doc.rust-lang.org/core/alloc/struct.Layout.html#method.pad_to_align +[`max`]: https://doc.rust-lang.org/std/cmp/trait.Ord.html#method.max + +Both the `align_to` and the `pad_to_align` methods are still unstable. To enable then, we need to add **`#![feature(alloc_layout_extra)]`** to the beginning of our `lib.rs`. + +### Using it + +We can now update the `ALLOCATOR` static in the `allocator` module to use our new `LinkedListAllocator`: + +```rust +// in src/allocator.rs + +use linked_list::LinkedListAllocator; + +#[global_allocator] +static ALLOCATOR: Locked = + Locked::new(LinkedListAllocator::new()); +``` + +Since the `init` function behaves the same for the bump and linked list allocators, we don't need to modify the `init` call in `init_heap`. + +When we now run our `heap_allocation` tests again, we see that all tests pass now, including the `many_boxes_long_lived` test that failed with the bump allocator: + +``` +> cargo xtest --test heap_allocation +simple_allocation... [ok] +large_vec... [ok] +many_boxes... [ok] +many_boxes_long_lived... [ok] +``` + +This shows that our linked list allocator is able to reuse freed memory for subsequent allocations. + +### Discussion + +In contrast to the bump allocator, the linked list allocator is much more suitable as a general purpose allocator, mainly because it is able to directly reuse freed memory. However, it also has some drawbacks. Some of them are only caused by our basic implementation, but there are also fundamental drawbacks of the allocator design itself. + +#### Merging Freed Blocks + +The main problem of our implementation is that it only splits the heap into smaller blocks, but never merges them back together. Consider this example: + +![](linked-list-allocator-fragmentation-on-dealloc.svg) + +In the first line, three allocations are created on the heap. Two of them are freed again in line 2 and the third is freed in line 3. Now the complete heap is unused again, but it is still split into four individual blocks. At this point, a large allocation might not be possible anymore because none of the four blocks is large enough. Over time, the process continues and the heap is split into smaller and smaller blocks. At some point, the heap is so fragmented that even normal sized allocations will fail. + +To fix this problem, we need to merge adjacent freed blocks back together. For the above example, this would mean the following: + +![](linked-list-allocator-merge-on-dealloc.svg) + +Like before, two of the three allocations are freed in line `2`. Instead of keeping the fragmented heap, we now perform an additional step in line `2a` to merge the two rightmost blocks back together. In line `3`, the third allocation is freed (like before), resulting in a completely unused heap represented by three distinct blocks. In an additional merging step in line `3a` we then merge the three adjacent blocks back together. + +The `linked_list_allocator` crate implements this merging strategy in the following way: Instead of inserting freed memory blocks at the beginning of the linked list on `deallocate`, it always keeps the list sorted by start address. This way, merging can be performed directly on the `deallocate` call by examining the addresses and sizes of the two neighbor blocks in the list. Of course, the deallocation operation is slower this way, but it prevents the heap fragmentation we saw above. + +#### Performance + +As we learned above, the bump allocator is extremely fast and can be optimized to just a few assembly operations. The linked list allocator performs much worse in this category. The problem is that an allocation request might need to traverse the complete linked list until it finds a suitable block. + +Since the list length depends on the number of unused memory blocks, the performance can vary extremely for different programs. A program that only creates a couple of allocations will experience a relatively fast allocation performance. For a program that fragments the heap with many allocations, however, the allocation performance will be very bad because the linked list will be very long and mostly contain very small blocks. + +It's worth noting that this performance issue isn't a problem caused by our basic implementation, but a fundamental problem of the linked list approach. Since allocation performance can be very important for kernel-level code, we explore a third allocator design in the following that trades improved performance for reduced memory utilization. + +## Fixed-Size Block Allocator + +In the following, we present an allocator design that uses fixed-size memory blocks for fulfilling allocation requests. This way, the allocator often returns blocks that are larger than needed for allocations, which results in wasted memory due to [internal fragmentation]. On the other hand, it drastically reduces the time required to find a suitable block (compared to the linked list allocator), resulting in much better allocation performance. + +### Introduction + +The idea behind a _fixed-size block allocator_ is the following: Instead of allocating exactly as much memory as requested, we define a small number of block sizes and round up each allocation to the next block size. For example, with block sizes of 16, 64, and 512 bytes, an allocation of 4 bytes would return a 16-byte block, an allocation of 48 bytes a 64-byte block, and an allocation of 128 bytes an 512-byte block. + +Like the linked list allocator, we keep track of the unused memory by creating a linked list in the unused memory. However, instead of using a single list with different block sizes, we create a separate list for each size class. Each list then only stores blocks of a single size. For example, with block sizes 16, 64, and 512 there would be three separate linked lists in memory: + +![](fixed-size-block-example.svg). + +Instead of a single `head` pointer, we have the three head pointers `head_16`, `head_64`, and `head_512` that each point to the first unused block of the corresponding size. All nodes in a single list have the same size. For example, the list started by the `head_16` pointer only contains 16-byte blocks. This means that we no longer need to store the size in each list node since it is already specified by the name of the head pointer. + +Since each element in a list has the same size, each list element is equally suitable for an allocation request. This means that we can very efficiently perform an allocation using the following steps: + +- Round up the requested allocation size to the next block size. For example, when an allocation of 12 bytes is requested, we would choose the block size 16 in the above example. +- Retrieve the head pointer for the list, e.g. from an array. For block size 16, we need to use `head_16`. +- Remove the first block from the list and return it. + +Most notably, we can always return the first element of the list and no longer need to traverse the full list. Thus, allocations are much faster than with the linked list allocator. + +#### Block Sizes and Wasted Memory + +Depending on the block sizes, we lose a lot of memory by rounding up. For example, when a 512-byte block is returned for a 128 byte allocation, three quarters of the allocated memory are unused. By defining reasonable block sizes, it is possible to limit the amount of wasted memory to some degree. For example, when using the powers of 2 (4, 8, 16, 32, 64, 128, …) as block sizes, we can limit the memory waste to half of the allocation size in the worst case and a quarter of the allocation size in the average case. + +It is also common to optimize block sizes based on common allocation sizes in a program. For example, we could additionally add block size 24 to improve memory usage for programs that often perform allocations of 24 bytes. This way, the amount of wasted memory can be often reduced without losing the performance benefits. + +#### Deallocation + +Like allocation, deallocation is also very performant. It involves the following steps: + +- Round up the freed allocation size to the next block size. This is required since the compiler only passes the requested allocation size to `dealloc`, not the size of the block that was returned by `alloc`. By using the same size-adjustment function in both `alloc` and `dealloc` we can make sure that we always free the correct amount of memory. +- Retrieve the head pointer for the list, e.g. from an array. +- Add the freed block to the front of the list by updating the head pointer. + +Most notably, no traversal of the list is required for deallocation either. This means that the time required for a `dealloc` call stays the same regardless of the list length. + +#### Fallback Allocator + +Given that large allocations (>2KB) are often rare, especially in operating system kernels, it might make sense to fall back to a different allocator for these allocations. For example, we could fall back to a linked list allocator for allocations greater than 2048 bytes in order to reduce memory waste. Since only very few allocations of that size are expected, the the linked list would stay small so that (de)allocations would be still reasonably fast. + +#### Creating new Blocks + +Above, we always assumed that there are always enough blocks of a specific size in the list to fulfill all allocation requests. However, at some point the linked list for a block size becomes empty. At this point, there are two ways how we can create new unused blocks of a specific size to fulfill an allocation request: + +- Allocate a new block from the fallback allocator (if there is one). +- Split a larger block from a different list. This best works if block sizes are powers of two. For example, a 32-byte block can be split into two 16-byte blocks. + +For our implementation, we will allocate new blocks from the fallback allocator since the implementation is much simpler. + +### Implementation + +Now that we know how a fixed-size block allocator works, we can start our implementation. We won't depend on the implementation of the linked list allocator created in the previous section, so you can follow this part even if you skipped the linked list allocator implementation. + +#### List Node + +We start our implementation by creating a `ListNode` type in a new `allocator::fixed_size_block` module: + +```rust +// in src/allocator.rs + +pub mod fixed_size_block; +``` + +```rust +// in src/allocator/fixed_size_block.rs + +struct ListNode { + next: Option<&'static mut ListNode>, +} +``` + +This type is similar to the `ListNode` type of our [linked list allocator implementation], with the difference that we don't have a second `size` field. The `size` field isn't needed because every block in a list has the same size with the fixed-size block allocator design. + +[linked list allocator implementation]: #the-allocator-type + +#### Block Sizes + +Next, we define a constant `BLOCK_SIZES` slice with the block sizes used for our implementation: + +```rust +// in src/allocator/fixed_size_block.rs + +/// The block sizes to use. +/// +/// The sizes must each be power of 2 because they are also used as +/// the block alignment (alignments must be always powers of 2). +const BLOCK_SIZES: &[usize] = &[8, 16, 32, 64, 128, 256, 512, 1024, 2048]; +``` + +As block sizes, we use powers of 2 starting from 8 up to 2048. We don't define any block sizes smaller than 8 because each block must be capable of storing a 64-bit pointer to the next block when freed. For allocations greater than 2048 bytes we will fall back to a linked list allocator. + +To simplify the implementation, we define that the size of a block is also its required alignment in memory. So a 16 byte block is always aligned on a 16-byte boundary and a 512 byte block is aligned on a 512-byte boundary. Since alignments always need to be powers of 2, this rules out any other block sizes. If we need block sizes that are not powers of 2 in the future, we can still adjust our implementation for this (e.g. by defining a second `BLOCK_ALIGNMENTS` array). + +#### The Allocator Type + +Using the `ListNode` type and the `BLOCK_SIZES` slice, we can now define our allocator type: + +```rust +// in src/allocator/fixed_size_block.rs + +pub struct FixedSizeBlockAllocator { + list_heads: [Option<&'static mut ListNode>; BLOCK_SIZES.len()], + fallback_allocator: linked_list_allocator::Heap, +} +``` + +The `list_heads` field is an array of `head` pointers, one for each block size. This is implemented by using the `len()` of the `BLOCK_SIZES` slice as the array length. As a fallback allocator for allocations larger than the largest block size we use the allocator provided by the `linked_list_allocator`. We could also used the `LinkedListAllocator` we implemented ourselves instead, but it has the disadvantage that it does not [merge freed blocks]. + +[merge freed blocks]: #merging-freed-blocks + +For constructing a `FixedSizeBlockAllocator`, we provide the same `new` and `init` functions that we implemented for the other allocator types too: + +```rust +// in src/allocator/fixed_size_block.rs + +impl FixedSizeBlockAllocator { + /// Creates an empty FixedSizeBlockAllocator. + pub const fn new() -> Self { + FixedSizeBlockAllocator { + list_heads: [None; BLOCK_SIZES.len()], + fallback_allocator: linked_list_allocator::Heap::empty(), + } + } + + /// Initialize the allocator with the given heap bounds. + /// + /// This function is unsafe because the caller must guarantee that the given + /// heap bounds are valid and that the heap is unused. This method must be + /// called only once. + pub unsafe fn init(&mut self, heap_start: usize, heap_size: usize) { + self.fallback_allocator.init(heap_start, heap_size); + } +} +``` + +The `new` function just initializes the `list_heads` array with empty nodes and creates an [`empty`] linked list allocator as `fallback_allocator`. Since array initializations using non-`Copy` types are still unstable, we need to add **`#![feature(const_in_array_repeat_expressions)]`** to the beginning of our `lib.rs`. The reason that `None` is not `Copy` in this case is that `ListNode` does not implement `Copy`. Thus, the `Option` wrapper and its `None` variant are not `Copy` either. + +[`empty`]: https://docs.rs/linked_list_allocator/0.6.4/linked_list_allocator/struct.Heap.html#method.empty + +The unsafe `init` function only calls the [`init`] function of the `fallback_allocator` without doing any additional initialization of the `list_heads` array. Instead, we will initialize the lists lazily on `alloc` and `dealloc` calls. + +[`init`]: https://docs.rs/linked_list_allocator/0.6.4/linked_list_allocator/struct.Heap.html#method.init + +For convenience, we also create a private `fallback_alloc` method that allocates using the `fallback_allocator`: + +```rust +// in src/allocator/fixed_size_block.rs + +use alloc::alloc::Layout; +use core::ptr; + +impl FixedSizeBlockAllocator { + /// Allocates using the fallback allocator. + fn fallback_alloc(&mut self, layout: Layout) -> *mut u8 { + match self.fallback_allocator.allocate_first_fit(layout) { + Ok(ptr) => ptr.as_ptr(), + Err(_) => ptr::null_mut(), + } + } +} +``` + +Since the [`Heap`] type of the `linked_list_allocator` crate does not implement [`GlobalAlloc`] (as it's [not possible without locking]). Instead, it provides an [`allocate_first_fit`] method that has a slightly different interface. Instead of returning a `*mut u8` and using a null pointer to signal an error, it returns a `Result, AllocErr>`. The [`NonNull`] type is an abstraction for a raw pointer that is guaranteed to be not the null pointer. The [`AllocErr`] type a marker type for signaling an allocation error. By mapping the `Ok` case to the [`NonNull::as_ptr`] method and the `Err` case to a null pointer, we can easily translate this back to a `*mut u8` type. + +[`Heap`]: https://docs.rs/linked_list_allocator/0.6.4/linked_list_allocator/struct.Heap.html +[not possible without locking]: #globalalloc-and-mutability +[`allocate_first_fit`]: https://docs.rs/linked_list_allocator/0.6.4/linked_list_allocator/struct.Heap.html#method.allocate_first_fit +[`NonNull`]: https://doc.rust-lang.org/nightly/core/ptr/struct.NonNull.html +[`AllocErr`]: https://doc.rust-lang.org/nightly/core/alloc/struct.AllocErr.html +[`NonNull::as_ptr`]: https://doc.rust-lang.org/nightly/core/ptr/struct.NonNull.html#method.as_ptr + +#### Calculating the List Index + +Before we implement the `GlobalAlloc` trait, we define a `list_index` helper function that returns the lowest possible block size for a given [`Layout`]: + +```rust +// in src/allocator/fixed_size_block.rs + +/// Choose an appropriate block size for the given layout. +/// +/// Returns an index into the `BLOCK_SIZES` array. +fn list_index(layout: &Layout) -> Option { + let required_block_size = layout.size().max(layout.align()); + BLOCK_SIZES.iter().position(|&s| s >= required_block_size) +} +``` + +The block must be have at least the size and alignment required by the given `Layout`. Since we defined that the block size is also its alignment, this means that the `required_block_size` is the [maximum] of the layout's [`size()`] and [`align()`] attributes. To find the next-larger block in the `BLOCK_SIZES` slice, we first use the [`iter()`] method to get an iterator and then the [`position()`] method to find the index of the first block that is as least as large as the `required_block_size`. + +[maximum]: https://doc.rust-lang.org/core/cmp/trait.Ord.html#method.max +[`size()`]: https://doc.rust-lang.org/core/alloc/struct.Layout.html#method.size +[`align()`]: https://doc.rust-lang.org/core/alloc/struct.Layout.html#method.align +[`iter()`]: https://doc.rust-lang.org/core/primitive.slice.html#method.iter +[`position()`]: https://doc.rust-lang.org/core/iter/trait.Iterator.html#method.position + +Note that we don't return the block size itself, but the index into the `BLOCK_SIZES` slice. The reason is that we want to use the returned index as an index into the `list_heads` array. + +#### Implementing `GlobalAlloc` + +The last step is to implement the `GlobalAlloc` trait: + +```rust +// in src/allocator/fixed_size_block.rs + +use super::Locked; +use alloc::alloc::GlobalAlloc; + +unsafe impl GlobalAlloc for Locked { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + todo!(); + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + todo!(); + } +} +``` + +Like for the other allocators, we don't implement the `GlobalAlloc` trait directly for our allocator type, but use the [`Locked` wrapper] to add synchronized interior mutability. Since the `alloc` and `dealloc` implementations are relatively large, we introduce them one by one in the following. + +##### `alloc` + +The implementation of the `alloc` method looks like this: + +```rust +// in `impl` block in src/allocator/fixed_size_block.rs + +unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let mut allocator = self.lock(); + match list_index(&layout) { + Some(index) => { + match allocator.list_heads[index].take() { + Some(node) => { + allocator.list_heads[index] = node.next.take(); + node as *mut ListNode as *mut u8 + } + None => { + // no block exists in list => allocate new block + let block_size = BLOCK_SIZES[index]; + // only works if all block sizes are a power of 2 + let block_align = block_size; + let layout = Layout::from_size_align(block_size, block_align) + .unwrap(); + allocator.fallback_alloc(layout) + } + } + } + None => allocator.fallback_alloc(layout), + } +} +``` + +Let's go through it step by step: + +First, we use the `Locked::lock` method to get a mutable reference to the wrapped allocator instance. Next, we call the `list_index` function we just defined to calculate the appropriate block size for the given layout and get the corresponding index into the `list_heads` array. If this index is `None`, no block size fits for the allocation, therefore we use the `fallback_allocator` using the `fallback_alloc` function. + +If the list index is `Some`, we try to remove the first node in the corresponding list started by `list_heads[index]` using the [`Option::take`] method. If the list is not empty, we enter the `Some(node)` branch of the `match` statement, where we point the head pointer of the list to the successor of the popped `node` (by using [`take`][`Option::take`] again). Finally, we return the popped `node` pointer as a `*mut u8`. + +[`Option::take`]: https://doc.rust-lang.org/core/option/enum.Option.html#method.take + +If the list head is `None`, it indicates that the list of blocks is empty. This means that we need to construct a new block as [described above](#creating-new-blocks). For that, we first get the current block size from the `BLOCK_SIZES` slice and use it as both the size and the alignment for the new block. Then we create a new `Layout` from it and call the `fallback_alloc` method to perform the allocation. The reason for adjusting the layout and alignment is that the block will be added to the block list on deallocation. + +#### `dealloc` + +The implementation of the `dealloc` method looks like this: + +```rust +// in src/allocator/fixed_size_block.rs + +use core::{mem, ptr::NonNull}; + +// inside the `unsafe impl GlobalAlloc` block + +unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + let mut allocator = self.lock(); + match list_index(&layout) { + Some(index) => { + let new_node = ListNode { + next: allocator.list_heads[index].take(), + }; + // verify that block has size and alignment required for storing node + assert!(mem::size_of::() <= BLOCK_SIZES[index]); + assert!(mem::align_of::() <= BLOCK_SIZES[index]); + let new_node_ptr = ptr as *mut ListNode; + new_node_ptr.write(new_node); + allocator.list_heads[index] = Some(&mut *new_node_ptr); + } + None => { + let ptr = NonNull::new(ptr).unwrap(); + allocator.fallback_allocator.deallocate(ptr, layout); + } + } +} +``` + +Like in `alloc`, we first use the `lock` method to get a mutable allocator reference and then the `list_index` function to get the block list corresponding to the given `Layout`. If the index is `None`, no fitting block size exists in `BLOCK_SIZES`, which indicates that the allocation was created by the fallback allocator. Therefore we use its [`deallocate`][`Heap::deallocate`] to free the memory again. The method expects a [`NonNull`] instead of a `*mut u8`, so we need to convert the pointer first. (The `unwrap` call only fails when the pointer is null, which should never happen when the compiler calls `dealloc`.) + +[`Heap::deallocate`]: https://docs.rs/linked_list_allocator/0.6.4/linked_list_allocator/struct.Heap.html#method.deallocate + +If `list_index` returns a block index, we need to add the freed memory block to the list. For that, we first create a new `ListNode` that points to the current list head (by using [`Option::take`] again). Before we write the new node into the freed memory block, we first assert that the current block size specified by `index` has the required size and alignment for storing a `ListNode`. Then we perform the write by converting the given `*mut u8` pointer to a `*mut ListNode` pointer and then calling the unsafe [`write`][`pointer::write`] method on it. The last step is to set the head pointer of the list, which is currently `None` since we called `take` on it, to our newly written `ListNode`. For that we convert the raw `new_node_ptr` to a mutable reference. + +[`pointer::write`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.write + +There are a few things worth noting: + +- We don't differentiate between blocks allocated from a block list and blocks allocated from the fallback allocator. This means that new blocks created in `alloc` are added to the block list on `dealloc`, thereby increasing the number of blocks of that size. +- The `alloc` method is the only place where new blocks are created in our implementation. This means that we initially start with empty block lists and only fill the lists lazily when allocations for that block size are performed. +- We don't need `unsafe` blocks in `alloc` and `dealloc`, even though we perform some `unsafe` operations. The reason is that Rust currently treats the complete body of unsafe functions as one large `unsafe` block. Since using explicit `unsafe` blocks has the advantage that it's obvious which operations are unsafe and which not, there is a [proposed RFC](https://github.com/rust-lang/rfcs/pull/2585) to change this behavior. + +### Using it + +To use our new `FixedSizeBlockAllocator`, we need to update the `ALLOCATOR` static in the `allocator` module: + +```rust +// in src/allocator.rs + +use fixed_size_block::FixedSizeBlockAllocator; + +#[global_allocator] +static ALLOCATOR: Locked = Locked::new( + FixedSizeBlockAllocator::new()); +``` + +Since the `init` function behaves the same for all allocators we implemented, we don't need to modify the `init` call in `init_heap`. + +When we now run our `heap_allocation` tests again, all tests should still pass: + +``` +> cargo xtest --test heap_allocation +simple_allocation... [ok] +large_vec... [ok] +many_boxes... [ok] +many_boxes_long_lived... [ok] +``` + +Our new allocator seems to work! + +### Discussion + +While the fixed-size block approach has a much better performance than the linked list approach, it wastes up to half of the memory when using powers of 2 as block sizes. Whether this tradeoff is worth it heavily depends on the application type. For an operating system kernel, where performance is critical, the fixed-size block approach seems to be the better choice. + +On the implementation side, there are various things that we could improve in our current implementation: + +- Instead of only allocating blocks lazily using the fallback allocator, it might be better to pre-fill the lists to improve the performance of initial allocations. +- To simplify the implementation, we only allowed block sizes that are powers of 2 so that we could use them also as the block alignment. By storing (or calculating) the alignment in a different way, we could also allow arbitrary other block sizes. This way, we could add more block sizes, e.g. for common allocation sizes, in order to minimize the wasted memory. +- We currently only create new blocks, but never free them again. This results in fragmentation and might eventually result in allocation failure for large allocations. It might make sense to enforce a maximum list length for each block size. When the maximum length is reached, subsequent deallocations are freed using the fallback allocator instead of being added to the list. +- Instead of falling back to a linked list allocator, we could a special allocator for allocations greater than 4KiB. The idea is to utilize [paging], which operates on 4KiB pages, to map a continuous block of virtual memory to non-continuous physical frames. This way, fragmentation of unused memory is no longer a problem for large allocations. +- With such a page allocator, it might make sense to add block sizes up to 4KiB and drop the linked list allocator completely. The main advantages of this would be reduced fragmentation and improved performance predictability, i.e. better worse-case performance. + +[paging]: @/second-edition/posts/08-paging-introduction/index.md + +It's important to note that the implementation improvements outlined above are only suggestions. Allocators used in operating system kernels are typically highly optimized to the specific workload of the kernel, which is only possible through extensive profiling. + +### Variations + +There are also many variations of the fixed-size block allocator design. Two popular examples are the _slab allocator_ and the _buddy allocator_, which are also used in popular kernels such as Linux. In the following, we give a short introduction to these two designs. + +#### Slab Allocator + +The idea behind a [slab allocator] is to use block sizes that directly correspond to selected types in the kernel. This way, allocations of those types fit a block size exactly and no memory is wasted. Sometimes, it might be even possible to preinitialize type instances in unused blocks to further improve performance. + +[slab allocator]: https://en.wikipedia.org/wiki/Slab_allocation + +Slab allocation is often combined with other allocators. For example, it can be used together with a fixed-size block allocator to further split an allocated block in order to reduce memory waste. It is also often used to implement an [object pool pattern] on top of a single large allocation. + +[object pool pattern]: https://en.wikipedia.org/wiki/Object_pool_pattern + +#### Buddy Allocator + +Instead of using a linked list to manage freed blocks, the [buddy allocator] design uses a [binary tree] data structure together with power-of-2 block sizes. When a new block of a certain size is required, it splits a larger sized block into two halves, thereby creating two child nodes in the tree. Whenever a block is freed again, the neighbor block in the tree is analyzed. If the neighbor is also free, the two blocks are joined back together to a block of twice the size. + +The advantage of this merge process is that [external fragmentation] is reduced so that small freed blocks can be reused for a large allocation. It also does not use a fallback allocator, so the performance is more predictable. The biggest drawback is that only power-of-2 block sizes are possible, which might result in a large amount of wasted memory due to [internal fragmentation]. For this reason, buddy allocators are often combined with a slab allocator to further split an allocated block into multiple smaller blocks. + +[buddy allocator]: https://en.wikipedia.org/wiki/Buddy_memory_allocation +[binary tree]: https://en.wikipedia.org/wiki/Binary_tree +[external fragmentation]: https://en.wikipedia.org/wiki/Fragmentation_(computing)#External_fragmentation +[internal fragmentation]: https://en.wikipedia.org/wiki/Fragmentation_(computing)#Internal_fragmentation + + +## Summary + +This post gave an overview over different allocator designs. We learned how to implement a basic [bump allocator], which hands out memory linearly by increasing a single `next` pointer. While bump allocation is very fast, it can only reuse memory after all allocations have been freed. For this reason, it is rarely used as a global allocator. + +[bump allocator]: @/second-edition/posts/11-allocator-designs/index.md#bump-allocator + +Next, we created a [linked list allocator] that uses the freed memory blocks itself to create a linked list, the so-called [free list]. This list makes it possible to store an arbitrary number of freed blocks of different sizes. While no memory waste occurs, the approach suffers from poor performance because an allocation request might require a complete traversal of the list. Our implementation also suffers from [external fragmentation] because it does not merge adjacent freed blocks back together. + +[linked list allocator]: @/second-edition/posts/11-allocator-designs/index.md#linked-list-allocator +[free list]: https://en.wikipedia.org/wiki/Free_list + +To fix the performance problems of the linked list approach, we created a [fixed-size block allocator] that predefines a fixed set of block sizes. For each block size, a separate [free list] exists so that allocations and deallocations only need to insert/pop at the front of the list and are thus very fast. Since each allocation is rounded up to the next larger block size, some memory is wasted due to [internal fragmentation]. + +[fixed-size block allocator]: @/second-edition/posts/11-allocator-designs/index.md#fixed-size-block-allocator + +There are many more allocator designs with different tradeoffs. [Slab allocation] works well to optimize the allocation of common fixed-size structures, but is not applicable in all situations. [Buddy allocation] uses a binary tree to merge freed blocks back together, but wastes a large amount of memory because it only supports power-of-2 block sizes. It's also important to remember that each kernel implementation has a unique workload, so there is no "best" allocator design that fits all cases. + +[Slab allocation]: @/second-edition/posts/11-allocator-designs/index.md#slab-allocator +[Buddy allocation]: @/second-edition/posts/11-allocator-designs/index.md#buddy-allocator + + +## What's next? + +With this post, we conclude our memory management implementation for now. Next, we will start exploring [_multitasking_], starting with [_threads_]. In subsequent post we will then explore [_multiprocessing_], [_processes_], and cooperative multitasking in the form of [_async/await_]. + +[_multitasking_]: https://en.wikipedia.org/wiki/Computer_multitasking +[_threads_]: https://en.wikipedia.org/wiki/Thread_(computing) +[_processes_]: https://en.wikipedia.org/wiki/Process_(computing) +[_multiprocessing_]: https://en.wikipedia.org/wiki/Multiprocessing +[_async/await_]: https://rust-lang.github.io/async-book/01_getting_started/04_async_await_primer.html diff --git a/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocation.svg b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocation.svg new file mode 100644 index 00000000..a24ac160 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocation.svg @@ -0,0 +1,2 @@ + +
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-fragmentation-on-dealloc.svg b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-fragmentation-on-dealloc.svg new file mode 100644 index 00000000..5b890563 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-fragmentation-on-dealloc.svg @@ -0,0 +1,3 @@ + + +
1
1
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
2
2
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
3
3
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-merge-on-dealloc.svg b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-merge-on-dealloc.svg new file mode 100644 index 00000000..d9b408a2 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-merge-on-dealloc.svg @@ -0,0 +1,3 @@ + + +
2a
2a
heap end
heap end
heap start
heap start
size
s...
next pointer
n...
head
head
3a
3a
heap end
heap end
heap start
heap start
size
s...
next pointer
n...
head
head
1
1
heap end
heap end
heap start
heap start
size
s...
next pointer
n...
head
head
2
2
heap end
heap end
heap start
heap start
size
s...
next pointer
n...
head
head
3
3
heap end
heap end
heap start
heap start
size
s...
next pointer
n...
head
head
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-push.svg b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-push.svg new file mode 100644 index 00000000..734bfaaf --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-push.svg @@ -0,0 +1,2 @@ + +
Step
[Not supported by viewer]
1
[Not supported by viewer]
2
[Not supported by viewer]
0
[Not supported by viewer]
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
node:
node:
freed
freed
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
Operations
<b>Operations</b>
\ No newline at end of file diff --git a/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-remove-region.svg b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-remove-region.svg new file mode 100644 index 00000000..229130b9 --- /dev/null +++ b/blog/content/second-edition/posts/11-allocator-designs/linked-list-allocator-remove-region.svg @@ -0,0 +1,2 @@ + +
Step
[Not supported by viewer]
1
[Not supported by viewer]
2
[Not supported by viewer]
0
[Not supported by viewer]
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
head
head
Operations
<b>Operations</b>
region
region
current
<div>current</div>
current.next
current.next
region.next
region.next
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
region
region
current
<div>current</div>
head
head
ret
ret
next
[Not supported by viewer]
heap end
heap end
heap start
heap start
size
size
next pointer
<div>next pointer</div>
region
region
current
<div>current</div>
head
head
ret
ret
\ No newline at end of file diff --git a/blog/content/status-update/2019-12-02.md b/blog/content/status-update/2019-12-02.md new file mode 100644 index 00000000..b366c0a9 --- /dev/null +++ b/blog/content/status-update/2019-12-02.md @@ -0,0 +1,34 @@ ++++ +title = "Updates in October and November 2019" +date = 2019-12-02 ++++ + +This post gives an overview of the recent updates to the _Writing an OS in Rust_ blog and the used libraries and tools. + +I moved to a new apartment mid-October and had lots of work to do there, so I didn't have the time for creating the October status update post. Therefore, this post lists the changes from both October and November. I'm slowly picking up speed again, but I still have a lot of mails in my backlog. Sorry if you haven't received an answer yet! + +## `blog_os` + +The blog itself received only a minor update: [Use panic! instead of println! + loop in double fault handler](https://github.com/phil-opp/blog_os/pull/687). This fixes an issue where a double fault during `cargo xtest` leads to an endless loop without any output on the serial port. + +We also have other news: We plan to add [Experimental Support for Community Translations](https://github.com/phil-opp/blog_os/pull/692) to the blog. While this imposes additional challenges, it makes the content accessible to people who don't speak English, so it's definitely worth trying in my opinion. The first additional language will be [Chinese](https://github.com/phil-opp/blog_os/pull/694), based on an [existing translation](https://github.com/rustcc/writing-an-os-in-rust) by [@luojia65](https://github.com/luojia65). Many thanks also to [@TheBegining](https://github.com/TheBegining) and [@Rustin-Liu](https://github.com/Rustin-Liu) for helping with the translation! + +## `bootloader` + +- [Change the way the kernel entry point is called to honor alignement ABI](https://github.com/rust-osdev/bootloader/pull/81) by [@GuillaumeDIDIER](https://github.com/GuillaumeDIDIER) (published as version 0.8.2) +- [Add support for Github Actions](https://github.com/rust-osdev/bootloader/pull/82) +- [Remove unnecessary `extern C` on panic handler to fix not-ffi-safe warning](https://github.com/rust-osdev/bootloader/pull/85) by [@cmsd2](https://github.com/cmsd2) (published as version 0.8.3) + +## `bootimage` + +- [Don't exit with expected exit code when failed to read QEMU exit code](https://github.com/rust-osdev/bootimage/pull/47) + +## `x86_64` + +- [Switch to GitHub Actions for CI](https://github.com/rust-osdev/x86_64/pull/93) +- [Use `repr C` to suppress not-ffi-safe when used with extern handler functions](https://github.com/rust-osdev/x86_64/pull/94) by [@cmsd2](https://github.com/cmsd2) (published as version 0.7.6) +- [Add `slice` and `slice_mut` methods to IDT](https://github.com/rust-osdev/x86_64/pull/95) by [@foxcob](https://github.com/foxcob) (published as version 0.7.7) + +## `cargo-xbuild` + +- [Add support for publishing and installing cross compiled crates](https://github.com/rust-osdev/cargo-xbuild/pull/47) by [@ALSchwalm](https://github.com/ALSchwalm) (published as version 0.5.18) diff --git a/blog/content/status-update/2020-01-07.md b/blog/content/status-update/2020-01-07.md new file mode 100644 index 00000000..42893bcd --- /dev/null +++ b/blog/content/status-update/2020-01-07.md @@ -0,0 +1,58 @@ ++++ +title = "Updates in December 2019" +date = 2020-01-07 ++++ + +Happy New Year! + +This post gives an overview of the recent updates to the _Writing an OS in Rust_ blog and the corresponding libraries and tools. + +## `blog_os` + +The repository of the _Writing an OS in Rust_ blog received the following updates: + +- Update `x86_64` dependency to version 0.8.1. This included the [dependency update](https://github.com/phil-opp/blog_os/pull/701) itself, an [update of the frame allocation code](https://github.com/phil-opp/blog_os/pull/703), and an [update of the blog](https://github.com/phil-opp/blog_os/pull/704). +- [License the `blog/content` folder under CC BY-NC](https://github.com/phil-opp/blog_os/pull/705) +- [Reword sentence in first post](https://github.com/phil-opp/blog_os/pull/709) by [@pamolloy](https://github.com/pamolloy) + +Further, we're still working on adding [Experimental Support for Community Translations](https://github.com/phil-opp/blog_os/pull/692) to the blog, starting with [Simplified Chinese](https://github.com/phil-opp/blog_os/pull/694) and [Traditional Chinese](https://github.com/phil-opp/blog_os/pull/699). Any help is appreciated! + +## `bootloader` + +There were no updates to the bootloader this month. + +I'm currently working on rewriting the 16-bit/32-bit stages in Rust and making the bootloader more modular in the process. This should make it much easier to add support for UEFI and GRUB booting later. + +## `bootimage` + +There were no updates to the `bootimage` tool this month. + +## `x86_64` + +We landed a number of breaking changes this month: + +- [Replace `ux` dependency with custom wrapper structs](https://github.com/rust-osdev/x86_64/pull/91) +- [Add new UnusedPhysFrame type and use it in Mapper::map_to](https://github.com/rust-osdev/x86_64/pull/89) +- [Make Mapper trait object safe by adding `Self: Sized` bounds on generic functions](https://github.com/rust-osdev/x86_64/pull/84) +- [Rename divide_by_zero field of IDT to divide_error](https://github.com/rust-osdev/x86_64/pull/108) +- [Introduce new diverging handler functions for exceptions classified as "abort"](https://github.com/rust-osdev/x86_64/pull/109) + +These changes were released an version 0.8.0. Unfortunately, there was a missing re-export for the new `UnusedPhysFrame` type. We fixed it in [#110](https://github.com/rust-osdev/x86_64/pull/110) and released the fix as version 0.8.1. + +There was one more addition to the `x86_64` crate afterwards: + +- [Add support for cr4 control register (with complete documentation)](https://github.com/rust-osdev/x86_64/pull/111) by [@KarimAllah](https://github.com/KarimAllah) (released as version 0.8.2). + +There were also a few changes related to continuous integration: + +- [Remove bors from this repo](https://github.com/rust-osdev/x86_64/pull/103) +- [Run 'push' builds only for master branch](https://github.com/rust-osdev/x86_64/pull/104) +- [Remove Travis CI and Azure Pipelines scripts](https://github.com/rust-osdev/x86_64/pull/105) +- [Add caching of cargo crates to GitHub Actions CI](https://github.com/rust-osdev/x86_64/pull/100) + +## `cargo-xbuild` + +The `cargo-xbuild` crate, which cross-compiles the sysroot, received the following updates this month: + +- [Add `--quiet` flag that suppresses "waiting for file lock" message](https://github.com/rust-osdev/cargo-xbuild/pull/43) by [@Nils-TUD](https://github.com/Nils-TUD) (published as version 0.5.19) +- [Fix wrong feature name for memcpy=false](https://github.com/rust-osdev/cargo-xbuild/pull/50) (released as version 0.5.20) diff --git a/blog/content/status-update/2020-02-01.md b/blog/content/status-update/2020-02-01.md new file mode 100644 index 00000000..818a08ed --- /dev/null +++ b/blog/content/status-update/2020-02-01.md @@ -0,0 +1,69 @@ ++++ +title = "Updates in January 2020" +date = 2020-02-01 ++++ + +This post gives an overview of the recent updates to the _Writing an OS in Rust_ blog and the corresponding libraries and tools. + +## `blog_os` + +The repository of the _Writing an OS in Rust_ blog received the following updates: + +- [Move #[global_allocator] into allocator module](https://github.com/phil-opp/blog_os/pull/714) +- [Update many_boxes test to scale with heap size](https://github.com/phil-opp/blog_os/pull/716) +- [New post about allocator designs](https://github.com/phil-opp/blog_os/pull/719) 🎉 +- [Provide multiple implementations of align_up and mention performance](https://github.com/phil-opp/blog_os/pull/721) +- [Refactor Simplified Chinese translation of post 3](https://github.com/phil-opp/blog_os/pull/725) by [@Rustin-Liu](https://github.com/Rustin-Liu) +- [Use checked addition for allocator implementations](https://github.com/phil-opp/blog_os/pull/726) +- [Fix dummy allocator code example](https://github.com/phil-opp/blog_os/pull/728) +- [Some style updates to the front page](https://github.com/phil-opp/blog_os/pull/729) +- [Mark active item in table of contents](https://github.com/phil-opp/blog_os/pull/733) +- [Make active section link more discreet](https://github.com/phil-opp/blog_os/pull/734) by [@Menschenkindlein](https://github.com/Menschenkindlein) + +I also started working on the upcoming post about threads. + +## `bootloader` + +The bootloader crate received two minor updates this month: + +- [Move architecture checks from build script into lib.rs](https://github.com/rust-osdev/bootloader/pull/91) +- [Update x86_64 dependency to version 0.8.3](https://github.com/rust-osdev/bootloader/pull/92) by [@vinaychandra](https://github.com/vinaychandra) + +Since I focused my time on the new _Allocator Designs_ post, I did not have the time to make more progress on my plan to rewrite the 16-bit/32-bit stages of the bootloader in Rust. I hope to get back to it soon. + +## `bootimage` + +There were no updates to the `bootimage` tool this month. + +## `x86_64` + +The following changes were merged this month: + +- [Allow immediate port version of in/out instructions](https://github.com/rust-osdev/x86_64/pull/115) by [@m-ou-se](https://github.com/m-ou-se) +- [Make more functions const](https://github.com/rust-osdev/x86_64/pull/116) by [@m-ou-se](https://github.com/m-ou-se) + - Released as version 0.8.3 +- [Return the UnusedPhysFrame on MapToError::PageAlreadyMapped](https://github.com/rust-osdev/x86_64/pull/118) by [@haraldh](https://github.com/haraldh) + - This is a **breaking change** since it changes the signature of a type. + - No new release was published yet to give us the option to bundle it with other breaking changes. + +There are also some pull requests that have some open design questions and are still being discussed: + +- [Add p23_insert_flag_mask argument to mapper.map_to()](https://github.com/rust-osdev/x86_64/pull/114) by [@haraldh](https://github.com/haraldh) + - Related proposal: [Page Table Visitors](https://github.com/rust-osdev/x86_64/issues/121) by [@mark-i-m](https://github.com/mark-i-m) +- [Add User Mode registers](https://github.com/rust-osdev/x86_64/pull/119) by [@vinaychandra](https://github.com/vinaychandra) + +Please feel free to join these discussions if you have opinions on the matter. + +## `cargo-xbuild` + +The `cargo-xbuild` crate, which cross-compiles the sysroot, received the following updates this month: + +- [Override target path for building sysroot](https://github.com/rust-osdev/cargo-xbuild/pull/52) by [@upsuper](https://github.com/upsuper) + - Published as version 0.5.21 + +## `uart_16550` + +The `uart_16550` crate, which provides basic support for uart_16550 serial output, received a small dependency update: + +- [Update dependency for x86_64](https://github.com/rust-osdev/uart_16550/pull/4) by [@haraldh](https://github.com/haraldh) + - Published as version 0.2.2 diff --git a/blog/static/css/main.css b/blog/static/css/main.css index 69a4a432..0f795bd8 100644 --- a/blog/static/css/main.css +++ b/blog/static/css/main.css @@ -82,10 +82,14 @@ main img { border: 2px solid #fc0 } -.posts.exceptions { +.posts.interrupts { border: 2px solid #f66; } +.posts.multitasking { + border: 2px solid #556b2f; +} + .posts hr { margin: 2rem 0; } @@ -117,10 +121,14 @@ main img { color: #990; } -.post-category.exceptions { +.post-category.interrupts { color: #f33; } +.post-category.multitasking { + color: #556b2f; +} + .post-footer-support { margin-top: 2rem; } @@ -188,19 +196,26 @@ aside#all-posts-link { width: 12rem; position: sticky; float: left; - top: 1rem; + top: 3.5rem; margin-top: -4rem; margin-left: -15rem; font-size: 90%; line-height: 1.2; + } + + #toc-aside li > a, #toc-aside h2 { opacity: .5; transition: opacity .5s; } - #toc-aside:hover { + #toc-aside:hover li > a, #toc-aside:hover h2 { opacity: 1; } + #toc-aside li.active > a { + font-weight: bold; + } + #toc-aside h2 { font-size: 110%; margin-bottom: .2rem; @@ -212,7 +227,7 @@ aside#all-posts-link { list-style:none; } - #toc-aside ol li:before { + #toc-aside ol li a:before { content: ""; border-color: transparent #008eef; border-style: solid; @@ -243,7 +258,7 @@ aside#all-posts-link { margin-bottom: 1.5rem; } - aside#recent-updates h2 { + aside#recent-updates h1 { font-size: 110%; margin-bottom: .2rem; } @@ -258,11 +273,10 @@ aside#all-posts-link { } aside#all-posts-link { - float: left; - position: absolute; - bottom: 0; - left: -15rem; + position: fixed; + top: 1.25rem; margin-top: 0; + margin-left: -15rem; } } diff --git a/blog/static/js/main.js b/blog/static/js/main.js index 96ed6181..dfa1dd4d 100644 --- a/blog/static/js/main.js +++ b/blog/static/js/main.js @@ -3,6 +3,8 @@ window.onload = function() { if (container != null) { resize_toc(container); + toc_scroll_position(container); + window.onscroll = function() { toc_scroll_position(container) }; } } @@ -24,3 +26,38 @@ function resize_toc(container) { resizeId = setTimeout(resize, 300); }; } + +function toc_scroll_position(container) { + if (container.offsetParent === null) { + // skip computation if ToC is not visible + return; + } + var items = container.querySelectorAll("li") + + // remove active class for all items + for (item of container.querySelectorAll("li")) { + item.classList.remove("active"); + } + + // look for active item + var site_offset = document.documentElement.scrollTop; + var current_toc_item = null; + for (item of container.querySelectorAll("li")) { + if (item.offsetParent === null) { + // skip items that are not visible + continue; + } + var anchor = item.firstElementChild.getAttribute("href"); + var heading = document.querySelector(anchor); + if (heading.offsetTop <= (site_offset + document.documentElement.clientHeight / 3)) { + current_toc_item = item; + } else { + break; + } + } + + // set active class for current ToC item + if (current_toc_item != null) { + current_toc_item.classList.add("active"); + } +} diff --git a/blog/templates/macros.html b/blog/templates/macros.html index 406a1ef2..74090afd 100644 --- a/blog/templates/macros.html +++ b/blog/templates/macros.html @@ -3,16 +3,16 @@

{{ page.title }}

{{ page.summary | safe}} - read more… + read more »
{% endmacro post_link %} {% macro utterances() %} diff --git a/blog/templates/second-edition/base.html b/blog/templates/second-edition/base.html index 7319cb8a..5ff4e0b2 100644 --- a/blog/templates/second-edition/base.html +++ b/blog/templates/second-edition/base.html @@ -6,7 +6,7 @@ - + {% if current_url %} @@ -25,7 +25,7 @@

- {{ config.title | safe }} (Second Edition) + {{ config.title | safe }}

{{ config.extra.subtitle | replace(from=" ", to=" ") | safe }}

{% block header %}{% endblock header %} @@ -39,7 +39,7 @@

- © . All rights reserved. + © . All rights reserved. Contact
diff --git a/blog/templates/second-edition/extra.html b/blog/templates/second-edition/extra.html index 0926bcd2..105ff0be 100644 --- a/blog/templates/second-edition/extra.html +++ b/blog/templates/second-edition/extra.html @@ -4,6 +4,10 @@ {% block title %}{{ page.title }} | {{ config.title }}{% endblock title %} +{% block description -%} +{{ page.summary | safe | striptags }} +{%- endblock description %} + {% block main %}

{{ page.title }}

{{ page.content | safe }} diff --git a/blog/templates/second-edition/index.html b/blog/templates/second-edition/index.html index 5f27c1a7..6c7a89bb 100644 --- a/blog/templates/second-edition/index.html +++ b/blog/templates/second-edition/index.html @@ -8,6 +8,8 @@ {% set posts_section = get_section(path = "second-edition/posts/_index.md") %} {% set posts = posts_section.pages %} +

Posts

+

This blog series creates a small operating system in the @@ -21,59 +23,37 @@

- -
- {{ macros::post_link(page=posts.0) }} - {{ macros::post_link(page=posts.1) }} - {{ macros::post_link(page=posts.2) }} - {{ macros::post_link(page=posts.3) }} +
+{% set chapter = "none" %} +{% for post in posts %} + {% if post.extra["chapter"] %} + {% if post.extra["chapter"] != chapter %} + {# Begin new chapter #} + {% set_global chapter = post.extra["chapter"] %}
- -
- {{ macros::post_link(page=posts.4) }} - {{ macros::post_link(page=posts.5) }} - {{ macros::post_link(page=posts.6) }} -
+ +
+ {% endif %} + {% endif %} - -
- {{ macros::post_link(page=posts.7) }} - {{ macros::post_link(page=posts.8) }} - {{ macros::post_link(page=posts.9) }} + {{ macros::post_link(page=post) }} +{% endfor %}
- -
-

First Edition

- You are viewing the second edition of “Writing an OS in Rust”, which is still in progress. The first edition has more content, but is no longer updated. We try our best to incorporate the missing content soon. +

-
- {% set extra = get_section(path = "second-edition/extra/_index.md") %} -

{{ extra.title }}

- -
-

Status Updates

{% set status_updates = get_section(path = "status-update/_index.md") %} @@ -87,18 +67,25 @@
+

First Edition

+

You are currently viewing the second edition of “Writing an OS in Rust”. The first edition is very different in many aspects, for example it builds upon the GRUB bootloader instead of using the `bootloader` crate. In case you're interested in it, it is still available. Note that the first edition is no longer updated and might contain outdated information. read the first edition »

+
+ +

Support Me

{% include "support.html" %}
+{% endblock main %} +{% block after_main %} -{% endblock main %} +{% endblock after_main %} diff --git a/blog/templates/second-edition/page.html b/blog/templates/second-edition/page.html index 0a39a024..18daa528 100644 --- a/blog/templates/second-edition/page.html +++ b/blog/templates/second-edition/page.html @@ -7,6 +7,10 @@ {% endblock header %} +{% block description -%} +{{ page.summary | safe | striptags }} +{%- endblock description %} + {% block main %}

{{ page.title }}