本文共 5460 字,大约阅读时间需要 18 分钟。
ENTRY(entry_SYSCALL_64) movq %rsp, PER_CPU_VAR(rsp_scratch) // 保存用户堆栈指针 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp // 获取tss的sp1段作为内核栈, 系统调用默认会将切换调用者到sp0, 但并不改变rsp指针 =># define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) /* * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find * the top of the kernel stack. Use an extra percpu variable to track the * top of the kernel stack directly. */ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = (unsigned long)&init_thread_union + THREAD_SIZE; /* 讲用户寄存器压到tss段 */ /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %rax /* pt_regs->orig_ax */ PUSH_AND_CLEAR_REGS rax=$-ENOSYS =>.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The * lower registers are likely clobbered well before they * could be put to use in a speculative execution gadget. * Interleave XOR with PUSH for better uop scheduling: */ .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS .if \save_ret pushq %rsi /* return address on top of stack */ .endif .endm /* IRQs are off. */ movq %rsp, %rdi // rsp 作为内核栈, 也就是说 tss 是内核栈 call do_syscall_64 /* returns with IRQs disabled */ =>__visible void do_syscall_64(struct pt_regs *regs) pt_regs的定义如下 struct pt_regs { /* * C ABI says these regs are callee-preserved. They aren't saved on kernel entry * unless syscall needs a complete, fully filled "struct pt_regs". */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long rbp; unsigned long rbx; /* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; unsigned long r8; unsigned long rax; unsigned long rcx; unsigned long rdx; unsigned long rsi; unsigned long rdi; /* * On syscall entry, this is syscall#. On CPU exception, this is error code. * On hw interrupt, it's IRQ number: */ unsigned long orig_rax; /* Return frame for iretq */ unsigned long rip; unsigned long cs; unsigned long eflags; unsigned long rsp; unsigned long ss; /* top of stack page */ }; POP_REGS pop_rdi=0 skip_r11rcx=1 =>.macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx .if \skip_r11rcx popq %rsi .else popq %r11 .endif popq %r10 popq %r9 popq %r8 popq %rax .if \skip_r11rcx popq %rsi .else popq %rcx .endif popq %rdx popq %rsi .if \pop_rdi popq %rdi .endif .endm /* * Now all regs are restored except RSP and RDI. * Save old stack pointer and switch to trampoline stack. */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ /* * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi =>.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req pushq %rax SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax popq %rax .endm popq %rdi popq %rsp USERGS_SYSRET64END(entry_SYSCALL_64)
【Linux系统调用】汇编级理解Linux系统调用
https://blog.csdn.net/Alan_cqu_cj/article/details/106272204深入理解系统调用
https://cnblogs.com/logan233/p/12972732.htmlSocket与系统调用深度分析 ——X86 64环境下Linux5.0以上的内核中
https://cnblogs.com/qfdzztt/p/12057457.htmlx86体系下linux中的任务切换与TSS
https://blog.csdn.net/dog250/article/details/6203529Linux Kernel系统调用分析
https://blog.csdn.net/weixin_41943030/article/details/88651574syscall是x86_64上的指令吗?
http://www.voidcn.com/article/p-hfqyhngm-bud.htmlIs syscall an instruction on x86_64?
https://stackoverflow.com/questions/10583891/is-syscall-an-instruction-on-x86-64What are the calling conventions for UNIX & Linux system calls (and user-space functions) on i386 and x86-64
https://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-and-user-space-fLinux syscall过程分析(万字长文)
https://cloud.tencent.com/developer/article/1492374[原创]简析syscall,sysret和sysenter,sysexit的具体过程
https://bbs.pediy.com/thread-226254.htmlLinux内核TSS的使用
https://www.cnblogs.com/long123king/p/3501853.html