Try Live
Add Docs
Rankings
Pricing
Docs
Install
Install
Docs
Pricing
More...
More...
Try Live
Rankings
Enterprise
Create API Key
Add Docs
Linux
https://github.com/torvalds/linux
Admin
Linux kernel source tree
Tokens:
5,552,885
Snippets:
26,990
Trust Score:
8
Update:
4 months ago
Context
Skills
Chat
Benchmark
77.7
Suggestions
Latest
Show doc for...
Code
Info
Show Results
Context Summary (auto-generated)
Raw
Copy
Link
# Linux Kernel ## Introduction The Linux kernel is a free and open-source, monolithic, modular, multitasking, Unix-like operating system kernel. It was originally authored in 1991 by Linus Torvalds and has since become one of the most prominent examples of free and open-source software. The Linux kernel serves as the core interface between a computer's hardware and its processes, managing system resources and facilitating communication between software and hardware components. This is version 6.18.0-rc6 (codename: Baby Opossum Posse). The kernel provides fundamental services including process scheduling, memory management, device drivers, file systems, and networking capabilities. It supports multiple processor architectures (x86, ARM, ARM64, RISC-V, PowerPC, MIPS, and many others), offers extensive hardware support through thousands of device drivers, implements advanced memory management with demand paging and copy-on-write, provides a sophisticated process scheduler with support for real-time tasks, and includes comprehensive networking stacks supporting TCP/IP, IPv6, wireless protocols, and more. The kernel is written primarily in C with architecture-specific assembly code and increasingly incorporates Rust for improved memory safety. ## System Calls and Kernel APIs ### Process Creation with fork() The fork system call creates a new process by duplicating the calling process, creating a child process with its own address space. ```c #include <linux/sched.h> #include <linux/syscalls.h> /* Kernel implementation in kernel/fork.c */ SYSCALL_DEFINE0(fork) { struct kernel_clone_args args = { .exit_signal = SIGCHLD, }; return kernel_clone(&args); } /* User-space usage example */ #include <unistd.h> #include <stdio.h> #include <sys/wait.h> int main(void) { pid_t pid; int status; pid = fork(); if (pid < 0) { /* Error handling */ perror("fork failed"); return 1; } else if (pid == 0) { /* Child process */ printf("Child process: PID=%d\n", getpid()); return 0; } else { /* Parent process */ printf("Parent process: PID=%d, Child PID=%d\n", getpid(), pid); waitpid(pid, &status, 0); printf("Child exited with status %d\n", WEXITSTATUS(status)); return 0; } } ``` ### File Operations API The file operations structure defines how the kernel interacts with files and devices through the virtual file system layer. ```c #include <linux/fs.h> #include <linux/module.h> #include <linux/kernel.h> /* Device file operations structure */ static ssize_t device_read(struct file *filp, char __user *buffer, size_t length, loff_t *offset) { char kernel_buf[256] = "Hello from kernel device!\n"; size_t bytes_to_copy; /* Calculate remaining bytes */ if (*offset >= strlen(kernel_buf)) return 0; bytes_to_copy = min(length, strlen(kernel_buf) - *offset); /* Copy data to user space */ if (copy_to_user(buffer, kernel_buf + *offset, bytes_to_copy)) return -EFAULT; *offset += bytes_to_copy; return bytes_to_copy; } static ssize_t device_write(struct file *filp, const char __user *buffer, size_t length, loff_t *offset) { char kernel_buf[256]; size_t bytes_to_copy = min(length, sizeof(kernel_buf) - 1); if (copy_from_user(kernel_buf, buffer, bytes_to_copy)) return -EFAULT; kernel_buf[bytes_to_copy] = '\0'; pr_info("Received from user: %s\n", kernel_buf); return bytes_to_copy; } static int device_open(struct inode *inode, struct file *file) { pr_info("Device opened\n"); try_module_get(THIS_MODULE); return 0; } static int device_release(struct inode *inode, struct file *file) { pr_info("Device closed\n"); module_put(THIS_MODULE); return 0; } static struct file_operations fops = { .owner = THIS_MODULE, .read = device_read, .write = device_write, .open = device_open, .release = device_release, }; ``` ### Memory Allocation APIs The kernel provides multiple memory allocation mechanisms for different use cases, from small allocations to large physically contiguous buffers. ```c #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/gfp.h> /* kmalloc - for small allocations (<= PAGE_SIZE typically) */ void *ptr; ptr = kmalloc(1024, GFP_KERNEL); if (!ptr) { pr_err("kmalloc failed\n"); return -ENOMEM; } /* Use the allocated memory */ memset(ptr, 0, 1024); /* Free when done */ kfree(ptr); /* vmalloc - for larger allocations, not physically contiguous */ void *large_buf; large_buf = vmalloc(1024 * 1024); /* 1MB */ if (!large_buf) { pr_err("vmalloc failed\n"); return -ENOMEM; } /* Use the buffer */ memset(large_buf, 0, 1024 * 1024); /* Free when done */ vfree(large_buf); /* get_free_pages - for page-aligned allocations */ unsigned long pages; pages = __get_free_pages(GFP_KERNEL, 2); /* 2^2 = 4 pages */ if (!pages) { pr_err("get_free_pages failed\n"); return -ENOMEM; } /* Use the pages */ memset((void *)pages, 0, PAGE_SIZE * 4); /* Free when done */ free_pages(pages, 2); /* kmem_cache - for frequently allocated objects of same size */ struct kmem_cache *my_cache; struct my_object { int data; char name[64]; }; my_cache = kmem_cache_create("my_object_cache", sizeof(struct my_object), 0, SLAB_HWCACHE_ALIGN, NULL); if (!my_cache) { pr_err("kmem_cache_create failed\n"); return -ENOMEM; } /* Allocate from cache */ struct my_object *obj = kmem_cache_alloc(my_cache, GFP_KERNEL); if (obj) { obj->data = 42; strscpy(obj->name, "test", sizeof(obj->name)); /* Free back to cache */ kmem_cache_free(my_cache, obj); } /* Destroy cache when module unloads */ kmem_cache_destroy(my_cache); ``` ### Network Socket System Calls The socket API provides network communication capabilities through a standardized interface supporting multiple protocol families. ```c /* Kernel implementation in net/socket.c */ #include <linux/socket.h> #include <linux/net.h> #include <linux/syscalls.h> /* User-space TCP server example */ #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> int main(void) { int server_fd, client_fd; struct sockaddr_in address; int opt = 1; int addrlen = sizeof(address); char buffer[1024] = {0}; const char *response = "HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello, World!"; /* Create socket file descriptor */ if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { perror("socket failed"); exit(EXIT_FAILURE); } /* Set socket options */ if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, &opt, sizeof(opt)) == -1) { perror("setsockopt failed"); exit(EXIT_FAILURE); } /* Bind socket to port 8080 */ address.sin_family = AF_INET; address.sin_addr.s_addr = INADDR_ANY; address.sin_port = htons(8080); if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) == -1) { perror("bind failed"); exit(EXIT_FAILURE); } /* Listen for connections */ if (listen(server_fd, 10) == -1) { perror("listen failed"); exit(EXIT_FAILURE); } printf("Server listening on port 8080...\n"); /* Accept and handle connection */ if ((client_fd = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) == -1) { perror("accept failed"); exit(EXIT_FAILURE); } /* Read request */ read(client_fd, buffer, 1024); printf("Received: %s\n", buffer); /* Send response */ send(client_fd, response, strlen(response), 0); printf("Response sent\n"); /* Cleanup */ close(client_fd); close(server_fd); return 0; } ``` ### Process Scheduling API The scheduler manages CPU time allocation across processes and threads with support for multiple scheduling policies. ```c #include <linux/sched.h> #include <linux/sched/rt.h> #include <linux/kthread.h> /* Kernel thread creation and scheduling example */ static struct task_struct *worker_thread; static int worker_function(void *data) { while (!kthread_should_stop()) { pr_info("Worker thread running: PID=%d\n", current->pid); /* Do work here */ msleep(1000); /* Sleep for 1 second */ /* Yield CPU voluntarily */ cond_resched(); } return 0; } static int __init my_init(void) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; /* Create kernel thread */ worker_thread = kthread_create(worker_function, NULL, "my_worker"); if (IS_ERR(worker_thread)) { pr_err("Failed to create kernel thread\n"); return PTR_ERR(worker_thread); } /* Set real-time scheduling policy */ sched_setscheduler(worker_thread, SCHED_FIFO, ¶m); /* Wake up the thread */ wake_up_process(worker_thread); pr_info("Kernel thread created and started\n"); return 0; } static void __exit my_exit(void) { /* Stop kernel thread */ if (worker_thread) { kthread_stop(worker_thread); pr_info("Kernel thread stopped\n"); } } /* User-space scheduling example */ #include <sched.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> void *thread_function(void *arg) { printf("Thread running with priority %d\n", *(int *)arg); sleep(1); return NULL; } int main(void) { pthread_t thread; struct sched_param param; pthread_attr_t attr; int priority = 50; /* Initialize thread attributes */ pthread_attr_init(&attr); /* Set scheduling policy to FIFO */ pthread_attr_setschedpolicy(&attr, SCHED_FIFO); /* Set priority */ param.sched_priority = priority; pthread_attr_setschedparam(&attr, ¶m); /* Set inherit scheduler attribute */ pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); /* Create thread */ if (pthread_create(&thread, &attr, thread_function, &priority) != 0) { perror("pthread_create failed"); return 1; } /* Wait for thread completion */ pthread_join(thread, NULL); pthread_attr_destroy(&attr); return 0; } ``` ### Device Driver Registration Device drivers register themselves with the kernel to manage hardware devices, providing standardized interfaces for device access. ```c #include <linux/module.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/device.h> #include <linux/uaccess.h> #define DEVICE_NAME "mydevice" #define CLASS_NAME "myclass" static int major_number; static struct class *device_class = NULL; static struct device *device_node = NULL; static struct cdev my_cdev; /* Device operations */ static int dev_open(struct inode *inodep, struct file *filep) { pr_info("%s: Device opened\n", DEVICE_NAME); return 0; } static ssize_t dev_read(struct file *filep, char __user *buffer, size_t len, loff_t *offset) { char message[] = "Hello from device driver!\n"; size_t message_len = strlen(message); size_t to_copy; if (*offset >= message_len) return 0; to_copy = min(len, message_len - (size_t)*offset); if (copy_to_user(buffer, message + *offset, to_copy)) return -EFAULT; *offset += to_copy; return to_copy; } static ssize_t dev_write(struct file *filep, const char __user *buffer, size_t len, loff_t *offset) { char kernel_buffer[256]; size_t to_copy = min(len, sizeof(kernel_buffer) - 1); if (copy_from_user(kernel_buffer, buffer, to_copy)) return -EFAULT; kernel_buffer[to_copy] = '\0'; pr_info("%s: Received: %s\n", DEVICE_NAME, kernel_buffer); return to_copy; } static int dev_release(struct inode *inodep, struct file *filep) { pr_info("%s: Device closed\n", DEVICE_NAME); return 0; } static struct file_operations fops = { .owner = THIS_MODULE, .open = dev_open, .read = dev_read, .write = dev_write, .release = dev_release, }; static int __init device_init(void) { dev_t dev; int ret; /* Allocate major number */ ret = alloc_chrdev_region(&dev, 0, 1, DEVICE_NAME); if (ret < 0) { pr_err("Failed to allocate major number\n"); return ret; } major_number = MAJOR(dev); /* Initialize character device */ cdev_init(&my_cdev, &fops); my_cdev.owner = THIS_MODULE; /* Add character device to system */ ret = cdev_add(&my_cdev, dev, 1); if (ret < 0) { unregister_chrdev_region(dev, 1); pr_err("Failed to add character device\n"); return ret; } /* Create device class */ device_class = class_create(CLASS_NAME); if (IS_ERR(device_class)) { cdev_del(&my_cdev); unregister_chrdev_region(dev, 1); pr_err("Failed to create device class\n"); return PTR_ERR(device_class); } /* Create device node */ device_node = device_create(device_class, NULL, dev, NULL, DEVICE_NAME); if (IS_ERR(device_node)) { class_destroy(device_class); cdev_del(&my_cdev); unregister_chrdev_region(dev, 1); pr_err("Failed to create device node\n"); return PTR_ERR(device_node); } pr_info("%s: Device driver registered: major=%d\n", DEVICE_NAME, major_number); return 0; } static void __exit device_exit(void) { dev_t dev = MKDEV(major_number, 0); device_destroy(device_class, dev); class_destroy(device_class); cdev_del(&my_cdev); unregister_chrdev_region(dev, 1); pr_info("%s: Device driver unregistered\n", DEVICE_NAME); } module_init(device_init); module_exit(device_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Your Name"); MODULE_DESCRIPTION("A simple character device driver"); MODULE_VERSION("1.0"); ``` ### BPF (Berkeley Packet Filter) Integration The kernel supports extended BPF programs that run in kernel space with safety guarantees for tracing, networking, and security. ```c /* BPF program example - XDP packet filter */ #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/in.h> #include <bpf/bpf_helpers.h> /* BPF map to count packets */ struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); __type(key, __u32); __type(value, __u64); } packet_count SEC(".maps"); SEC("xdp") int xdp_packet_filter(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; struct iphdr *ip; __u32 key = 0; __u64 *count; /* Boundary check */ if (data + sizeof(*eth) > data_end) return XDP_PASS; /* Only process IP packets */ if (eth->h_proto != htons(ETH_P_IP)) return XDP_PASS; ip = data + sizeof(*eth); if ((void *)(ip + 1) > data_end) return XDP_PASS; /* Count packets by protocol */ key = ip->protocol; count = bpf_map_lookup_elem(&packet_count, &key); if (count) __sync_fetch_and_add(count, 1); /* Drop packets from specific source (example: 192.168.1.100) */ if (ip->saddr == htonl(0xC0A80164)) return XDP_DROP; return XDP_PASS; } char _license[] SEC("license") = "GPL"; /* User-space loader program */ #include <stdio.h> #include <stdlib.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> #include <net/if.h> int main(int argc, char **argv) { struct bpf_object *obj; struct bpf_program *prog; int prog_fd, map_fd; int ifindex; if (argc < 2) { fprintf(stderr, "Usage: %s <interface>\n", argv[0]); return 1; } /* Get interface index */ ifindex = if_nametoindex(argv[1]); if (!ifindex) { perror("if_nametoindex"); return 1; } /* Load BPF program */ obj = bpf_object__open_file("xdp_filter.o", NULL); if (!obj) { fprintf(stderr, "Failed to open BPF object\n"); return 1; } if (bpf_object__load(obj)) { fprintf(stderr, "Failed to load BPF object\n"); return 1; } /* Find program and map */ prog = bpf_object__find_program_by_name(obj, "xdp_packet_filter"); prog_fd = bpf_program__fd(prog); map_fd = bpf_object__find_map_fd_by_name(obj, "packet_count"); /* Attach to interface */ if (bpf_xdp_attach(ifindex, prog_fd, 0, NULL) < 0) { perror("bpf_xdp_attach"); return 1; } printf("BPF program attached to %s\n", argv[1]); printf("Press Ctrl+C to detach and exit\n"); /* Read and print statistics every second */ while (1) { sleep(1); for (int proto = 0; proto < 256; proto++) { __u64 count = 0; if (bpf_map_lookup_elem(map_fd, &proto, &count) == 0 && count > 0) { printf("Protocol %d: %llu packets\n", proto, count); } } } return 0; } ``` ### Interrupt Handlers Interrupt handling allows the kernel to respond to hardware events asynchronously with minimal latency. ```c #include <linux/interrupt.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> static int irq_number; static unsigned long irq_count = 0; /* Top-half: Interrupt handler (runs in interrupt context) */ static irqreturn_t irq_handler(int irq, void *dev_id) { irq_count++; /* Acknowledge interrupt (hardware specific) */ /* ... hardware register access ... */ /* Schedule bottom-half if needed */ return IRQ_WAKE_THREAD; } /* Bottom-half: Threaded interrupt handler (can sleep) */ static irqreturn_t irq_thread_handler(int irq, void *dev_id) { pr_info("Processing interrupt %d (count: %lu)\n", irq, irq_count); /* Perform time-consuming work here */ /* This handler can sleep, use mutexes, etc. */ return IRQ_HANDLED; } static int __init irq_init(void) { int ret; /* Request IRQ with threaded handler */ irq_number = 11; /* Example IRQ number */ ret = request_threaded_irq(irq_number, irq_handler, /* Top-half */ irq_thread_handler, /* Bottom-half */ IRQF_SHARED, "my_device", THIS_MODULE); if (ret) { pr_err("Failed to request IRQ %d: %d\n", irq_number, ret); return ret; } pr_info("IRQ handler registered for IRQ %d\n", irq_number); return 0; } static void __exit irq_exit(void) { free_irq(irq_number, THIS_MODULE); pr_info("IRQ handler unregistered (handled %lu interrupts)\n", irq_count); } module_init(irq_init); module_exit(irq_exit); MODULE_LICENSE("GPL"); ``` ### Workqueue for Deferred Work Workqueues provide a mechanism to defer work to kernel threads, allowing interrupt handlers and other contexts to schedule tasks. ```c #include <linux/workqueue.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> static struct workqueue_struct *my_wq; struct work_data { struct work_struct work; int value; char *message; }; /* Work function that processes deferred tasks */ static void work_handler(struct work_struct *work) { struct work_data *data = container_of(work, struct work_data, work); pr_info("Work handler executing: value=%d, message=%s\n", data->value, data->message); /* Simulate some work */ msleep(100); /* Cleanup */ kfree(data->message); kfree(data); } /* Delayed work example */ static struct delayed_work delayed_task; static void delayed_work_handler(struct work_struct *work) { pr_info("Delayed work executed after timeout\n"); /* Reschedule for periodic execution */ schedule_delayed_work(&delayed_task, msecs_to_jiffies(5000)); } static int __init wq_init(void) { struct work_data *data; /* Create dedicated workqueue */ my_wq = alloc_workqueue("my_workqueue", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); if (!my_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; } /* Schedule immediate work */ data = kmalloc(sizeof(*data), GFP_KERNEL); if (data) { data->value = 42; data->message = kstrdup("Hello from workqueue", GFP_KERNEL); INIT_WORK(&data->work, work_handler); queue_work(my_wq, &data->work); } /* Schedule delayed work (executes after 5 seconds) */ INIT_DELAYED_WORK(&delayed_task, delayed_work_handler); schedule_delayed_work(&delayed_task, msecs_to_jiffies(5000)); pr_info("Workqueue initialized\n"); return 0; } static void __exit wq_exit(void) { /* Cancel delayed work */ cancel_delayed_work_sync(&delayed_task); /* Flush and destroy workqueue */ flush_workqueue(my_wq); destroy_workqueue(my_wq); pr_info("Workqueue destroyed\n"); } module_init(wq_init); module_exit(wq_exit); MODULE_LICENSE("GPL"); ``` ### Timer API The kernel timer subsystem allows scheduling functions to execute at specific times or after delays. ```c #include <linux/timer.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/jiffies.h> static struct timer_list my_timer; static unsigned int timer_count = 0; /* Timer callback function */ static void timer_callback(struct timer_list *timer) { timer_count++; pr_info("Timer fired! Count: %u, jiffies: %lu\n", timer_count, jiffies); /* Reschedule timer for 2 seconds from now */ if (timer_count < 10) { mod_timer(&my_timer, jiffies + msecs_to_jiffies(2000)); } else { pr_info("Timer finished after %u callbacks\n", timer_count); } } /* High-resolution timer example */ #include <linux/hrtimer.h> #include <linux/ktime.h> static struct hrtimer hr_timer; static enum hrtimer_restart hr_timer_callback(struct hrtimer *timer) { ktime_t now = ktime_get(); pr_info("High-resolution timer callback at %lld ns\n", ktime_to_ns(now)); /* Forward timer by 100ms */ hrtimer_forward_now(timer, ms_to_ktime(100)); return HRTIMER_RESTART; } static int __init timer_init(void) { ktime_t ktime; /* Initialize regular timer */ timer_setup(&my_timer, timer_callback, 0); mod_timer(&my_timer, jiffies + msecs_to_jiffies(2000)); pr_info("Timer scheduled for 2 seconds\n"); /* Initialize high-resolution timer */ ktime = ms_to_ktime(100); /* 100ms */ hrtimer_init(&hr_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hr_timer.function = &hr_timer_callback; hrtimer_start(&hr_timer, ktime, HRTIMER_MODE_REL); pr_info("High-resolution timer started\n"); return 0; } static void __exit timer_exit(void) { /* Delete timers */ del_timer_sync(&my_timer); hrtimer_cancel(&hr_timer); pr_info("Timers cancelled (regular timer fired %u times)\n", timer_count); } module_init(timer_init); module_exit(timer_exit); MODULE_LICENSE("GPL"); ``` ## Summary and Integration The Linux kernel serves as the foundation for numerous operating systems including desktop distributions (Ubuntu, Fedora, Debian), enterprise servers (Red Hat Enterprise Linux, SUSE Linux Enterprise), embedded systems (Android, automotive infotainment, IoT devices), and supercomputers. It provides a stable ABI for system calls while maintaining a flexible internal API that evolves with hardware and software requirements. The kernel's modular architecture allows for dynamic loading and unloading of functionality through loadable kernel modules (LKMs), enabling hardware support without requiring kernel recompilation or system reboots. Integration with the kernel occurs at multiple levels: user-space applications interact through system calls and special file systems like /proc and /sys; device drivers integrate via the device driver model with support for hotplug and power management; kernel modules extend functionality through well-defined APIs for networking, filesystems, and device management; and hardware interacts through standardized buses (PCI, USB, I2C) with abstraction layers that simplify driver development. The build system uses Kconfig for configuration and Kbuild for compilation, supporting cross-compilation for multiple architectures. Developers can build the kernel with `make menuconfig` for configuration and `make -j$(nproc)` for compilation, install modules with `make modules_install`, and deploy with `make install`. The extensive documentation in the Documentation/ directory provides guides for kernel development, driver writing, and API usage in reStructuredText format.