Skip to content

天楚锐齿

人工智能 云计算 大数据 物联网 IT 通信 嵌入式

天楚锐齿

  • 下载
  • 物联网
  • 云计算
  • 大数据
  • 人工智能
  • Linux&Android
  • 网络
  • 通信
  • 嵌入式
  • 杂七杂八

linux的arch/arm/kernel/head.S

2018-03-14
#ifdef DEBUG
#if defined(CONFIG_DEBUG_ICEDCC)         /*使用ARM EmbeddedICE DCC通道来调试*/
#ifdef CONFIG_CPU_V6
          .macro     loadsp, rb                        /*定义了一个宏,宏名是loadsp,rb是这个宏的参数。宏的参数在被引用时必须加”\”,如:mov \rb, #0x50000000. */
          .endm
          .macro     writeb, ch, rb
          mcr     p14, 0, \ch, c0, c5, 0            /*CP14调试通信通道协处理器调试通信通道协处理器DCC(the Debug Communications Channel)提供了两个32bits寄存器用于传送数据,还提供了6bits通信数据控制寄存器控制寄存器中的两个位提供目标和主机调试器之间的同步握手。这里把ch这个数据写入c5寄存器对应的ICE/JTAG调试器的寄存器*/
          .endm
#else
          .macro     loadsp, rb
          .endm
          .macro     writeb, ch, rb
          mcr     p14, 0, \ch, c1, c0, 0
          .endm
#endif
#else
#include <mach/debug-macro.S>
          .macro     writeb,     ch, rb
          senduart \ch, \rb                         /*把ch通过UART发送出去*/
          .endm
#if defined(CONFIG_ARCH_SA1100)
          .macro     loadsp, rb
          mov     \rb, #0x80000000     @ physical base address
#ifdef CONFIG_DEBUG_LL_SER3
          add     \rb, \rb, #0x00050000     @ Ser3     /*rb结果为0x80050000*/
#else
          add     \rb, \rb, #0x00010000     @ Ser1
#endif
          .endm
#elif defined(CONFIG_ARCH_S3C2410)
          .macro loadsp, rb
          mov     \rb, #0x50000000
          add     \rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT   /*得到UART基地址:0x5004(8)000*/
          .endm
#else
          .macro     loadsp,     rb
          addruart \rb                               /*得到UART基地址*/
          .endm
#endif
#endif
#endif
          .macro     kputc,val
          mov     r0, \val
          bl     putc
          .endm
          .macro     kphex,val,len
          mov     r0, \val
          mov     r1, #\len
          bl     phex
          .endm
          .macro     debug_reloc_start                                        /*输出重定位的起始信息,包括处理器id,架构id,控制寄存器值,需解压内核(gz格式)的起始地址,结束地址,内核实际执行地址*/
#ifdef DEBUG
          kputc     #’\n’
          kphex     r6, 8          /* processor id */
          kputc     #’:’
          kphex     r7, 8          /* architecture id */
#ifdef CONFIG_CPU_CP15
          kputc     #’:’
          mrc     p15, 0, r0, c1, c0
          kphex     r0, 8          /* control reg */
#endif
          kputc     #’\n’
          kphex     r5, 8          /* decompressed kernel start */
          kputc     #’-‘
          kphex     r9, 8          /* decompressed kernel end  */
          kputc     #’>’
          kphex     r4, 8          /* kernel execution address */
          kputc     #’\n’
#endif
          .endm
          .macro     debug_reloc_end                                                      /*输出内核实际将要执行结束地址*/
#ifdef DEBUG
          kphex     r5, 8          /* end of kernel */
          kputc     #’\n’
          mov     r0, r4
          bl     memdump          /* dump 256 bytes at start of kernel */   /*从实际执行地址开始,就是已经解压了的执行开始,输出256字节内容*/
#endif
          .endm
          .section “.start”, #alloc, #execinstr                                        /*start段开始,该段含有分配的数据和可执行的指令*/
/*
* sort out different calling conventions
*/
          .align
start:
          .type     start,#function                         /*.type指定start这个符号是函数类型*/
          .rept     8
          mov     r0, r0                                      /*保留8条空指令,跟中断向量表一样大*/
          .endr
          b     1f                                              /*跳到下面的1处执行*/
          .word     0x016f2818          @ Magic numbers to help the loader
          .word     start               @ absolute load/run zImage address
          .word     _edata               @ zImage end address  /*定义在vmlinux.lds.in里面,整个长度包含指令段、GOT/PLT和DATA段*/
1:          mov     r7, r1               @ save architecture ID      /*从uboot传入的参数:r0: 固定0,r1: 架构id,r2: 启动参数*/
          mov     r8, r2               @ save atags pointer
#ifndef __ARM_ARCH_2__
          /*
          * Booting from Angel – need to enter SVC mode and disable
          * FIQs/IRQs (numeric definitions from angel arm.h source).
          * We only do this if we were in user mode on entry.
          */
          mrs     r2, cpsr          @ get current mode            /*读取cpsr并判断是否处理器处于supervisor模式——从u-boot进入kernel,系统已经处于SVC32模式;而利用angel进入则处于user模式,还需要额外两条指令。之后是再次确认中断关闭,并完成cpsr写入。Angel 是 ARM 的调试协议,现在用的 MULTI-ICE 用的是 RDI 通讯协议, ANGLE 需要在板子上有 驻留程序,然后通过 串口就可以调试了。这里介绍一下半主机:半主机是用于 ARM 目标的一种机制,可将来自应用程序代码的输入/输出请求传送至运行调试器的主机。 例如,使用此机制可以启用 C 库中的函数,如printf() 和 scanf(),来使用主机的屏幕和键盘,而不是在目标系统上配备屏幕和键盘。半主机是通过一组定义好的软件指令(如 swi)来实现的,这些指令通过程序控制生成异常。 应用程序调用相应的半主机调用,然后调试代理处理该异常。 调试代理提供与主机之间的必需通信。 */
          tst     r2, #3               @ not user?
          bne     not_angel
          mov     r0, #0x17          @ angel_SWIreason_EnterSVC
          swi     0x123456          @ angel_SWI_ARM             /*0x17是angel_SWIreason_EnterSVC半主机操作,将处理器设置为超级用户模式,通过设置新 CPSR 中的两个中断掩码位来禁用所有中断。0x123456是arm指令集的半主机操作编号 */
not_angel:
          mrs     r2, cpsr          @ turn off interrupts to
          orr     r2, r2, #0xc0          @ prevent angel from running
          msr     cpsr_c, r2                                                        /*这里将cpsr中I、F位分别置“1”,关闭IRQ和FIQ */
#else
          teqp     pc, #0x0c000003          @ turn off interrupts    /*常用 TEQP PC,#(新模式编号) 来改变模式,就是存入CPSR的值为pc^ 0x0c000003(SVC模式)*/
#endif
          /*
          * Note that some cache flushing and other stuff may
          * be needed here – is there an Angel SWI call for this?
          */
/*链接器会把一些处理器相关的代码链接到这个位置,也就是arch/arm/boot/compressed/head-xxx.S文件中的代码。在那个文件里会对I/D cache以及MMU进行一些操作,但是都应该属于start段 */
          /*
          * some architecture specific code can be inserted
          * by the linker here, but it should preserve r7, r8, and r9.
          */
          .text                                                                             /*text段*/
          adr     r0, LC0                                                              /*LC0标签定义在下面,这里r0 = pc + LC0,即运行时LC0的地址*/
          ldmia     r0, {r1, r2, r3, r4, r5, r6, ip, sp}                        /*把LC0处存放的各个地址载入r1到sp*/
          subs     r0, r0, r1          @ calculate the delta offset        /*因为r1即下面的LC0处连接时定义了LC0的链接地址(.word     LC0),这里看运行时地址跟链接时地址是否一致,不一致则要重定位*/
                              @ if delta is zero, we are
          beq     not_relocated          @ running at the address we
                              @ were linked at.
          /*
          * We’re running at a different address.  We need to fix
          * up various pointers:
          *   r5 – zImage base address
          *   r6 – GOT start
          *   ip – GOT end
          */
          add     r5, r5, r0                     /*重定位时先修正r5即zImage的基地址_start,此时r0为运行地址与链接地址的差值*/
          add     r6, r6, r0                     /*r6基GOT起始地址_got_start*/
          add     ip, ip, r0                     /*ip即GOT结束地址_got_end*/
#ifndef CONFIG_ZBOOT_ROM
          /*
          * If we’re running fully PIC === CONFIG_ZBOOT_ROM = n,
          * we need to fix up pointers into the BSS region.
          *   r2 – BSS start
          *   r3 – BSS end
          *   sp – stack pointer
          */
          add     r2, r2, r0           /*修正BSS起始、结束地址和栈指针*/
          add     r3, r3, r0
          add     sp, sp, r0
          /*
          * Relocate all entries in the GOT table.
          */
1:          ldr     r1, [r6, #0]          @ relocate entries in the GOT /*取得r6即GOT表的起始地址处的值即页表描述符*/
          add     r1, r1, r0          @ table.  This fixes up the           /*修正描述符*/
          str     r1, [r6], #4          @ C references.                       /*存入页表,r6指向下一个描述符*/
          cmp     r6, ip                                                         /*是否完成,ip为GOT结束地址*/
          blo     1b
#else
          /*
          * Relocate entries in the GOT table.  We only relocate
          * the entries that are outside the (relocated) BSS region.
          */
1:          ldr     r1, [r6, #0]          @ relocate entries in the GOT   /*对got表中在bss段以外的符号进行重定位,一级描述符表的高12位是每个setcion的基地址,可以描述4096个section。一级页表大小为16K,每个页表项,即描述符占4字节,刚好可以容纳4096个描述符,所以这里就映射了4096*1M = 4G的空间。 */
          cmp     r1, r2               @ entry < bss_start ||
          cmphs     r3, r1               @ _end < entry
          addlo     r1, r1, r0          @ table.  This fixes up the
          str     r1, [r6], #4          @ C references.
          cmp     r6, ip
          blo     1b
#endif
not_relocated:     mov     r0, #0
1:          str     r0, [r2], #4          @ clear bss    /*把bss段清空*/
          str     r0, [r2], #4
          str     r0, [r2], #4
          str     r0, [r2], #4
          cmp     r2, r3
          blo     1b
          /*
          * The C runtime environment should now be setup
          * sufficiently.  Turn the cache on, set up some
          * pointers, and start decompressing.
          */
          bl     cache_on                                                       /*打开cache,看下面代码*/
          mov     r1, sp               @ malloc space above stack
          add     r2, sp, #0x10000     @ 64k max                    /*栈空间大小是4096字节,那么在栈空间地址上面再malloc 64K字节空间,这个给c函数malloc、free用*/
/*
* Check to see if we will overwrite ourselves.
*   r4 = final kernel address       /*最终解压后的内核地址*/
*   r5 = start of this image         /*zImage的运行时地址,就是这个head.S的start处的地址*/
*   r2 = end of malloc space (and therefore this image) /*指向栈再加64KB处*/
* We basically want:
*   r4 >= r2 -> OK
*   r4 + image length <= r5 -> OK
*/
          cmp     r4, r2
          bhs     wont_overwrite                                              /*r4>r2当然就不会发生地址冲突*/
          sub     r3, sp, r5          @ > compressed kernel size
          add     r0, r4, r3, lsl #2     @ allow for 4x expansion
          cmp     r0, r5
          bls     wont_overwrite                                               /*r5 > r4 + zimage长度,也不会冲突*/
          mov     r5, r2               @ decompress after malloc space /*有冲突,先解压到r2开始的位置,然后把将要被覆盖的代码段挪到解压之后的vmlinux的上面,最后把解压后的vmlinux挪到执行位置r4位置*/
          mov     r0, r5
          mov     r3, r7
          bl     decompress_kernel                                    /*misc.c的函数,解压内核,r0:解压后的起始地址,r1:malloc内存时的开始位置,r2:malloc内存时的结束位置,r3:架构id;解压缩的时候不需要压缩映象的起始地址,因为在misc.c里面直接使用的是arh/arm/boot/compressed/piggy.S里面定义的input_data和input_data_end。*/
          add     r0, r0, #127 + 128     @ alignment + stack
          bic     r0, r0, #127          @ align the kernel length /*r0为解压函数返回的解压缩后的内核开始位置,这里把内核对齐到128字节*/
/*
* r0     = decompressed kernel length
* r1-r3  = unused
* r4     = kernel execution address
* r5     = decompressed kernel start
* r6     = processor ID
* r7     = architecture ID
* r8     = atags pointer
* r9-r14 = corrupted
*/
          add     r1, r5, r0          @ end of decompressed kernel   /*r1为解压缩后的内核结束位置*/
          adr     r2, reloc_start     /*把后面需要使用到的代码挪到解压缩后的内核的后面位置,reloc_start为代码起始位置*/
          ldr     r3, LC1                 /*需要reloc重定位的大小*/
          add     r3, r2, r3             /*r3为结束地址*/
1:          ldmia     r2!, {r9 – r14}          @ copy relocation code
          stmia     r1!, {r9 – r14}          /*搬移代码*/
          ldmia     r2!, {r9 – r14}
          stmia     r1!, {r9 – r14}
          cmp     r2, r3
          blo     1b
          add     sp, r1, #128          @ relocate the stack  /*重定位栈指针*/
          bl     cache_clean_flush                                 /*flush所有cache*/
          add     pc, r5, r0          @ call relocation code  /*跳转到reloc_start位置执行*/
/*
* We’re not in danger of overwriting ourselves.  Do this the simple way.
*
* r4     = kernel execution address
* r7     = architecture ID
*/
wont_overwrite:     mov     r0, r4     /*设置起始地址*/
          mov     r3, r7                       /*架构id*/
          bl     decompress_kernel      /*直接解压缩*/
          b     call_kernel                   /*调用call_kernel,然后运行真正的内核*/
          .type     LC0, #object                      /*对象LC0存放各个入口地址*/
LC0:          .word     LC0               @ r1
          .word     __bss_start          @ r2
          .word     _end               @ r3
          .word     zreladdr          @ r4                 /*这个定义在makefile文件中:ZRELADDR*/
          .word     _start               @ r5
          .word     _got_start          @ r6
          .word     _got_end          @ ip
          .word     user_stack+4096          @ sp
LC1:          .word     reloc_end – reloc_start  /*需要重定位的长度*/
          .size     LC0, . – LC0                      /*LC0大小*/
#ifdef CONFIG_ARCH_RPC
          .globl     params
params:          ldr     r0, =params_phys
          mov     pc, lr
          .ltorg
          .align
#endif
/*
* Turn on the cache.  We need to setup some page tables so that we
* can have both the I and D caches on.
*
* We place the page tables 16k down from the kernel execution address,
* and we hope that nothing else is using it.  If we’re using it, we
* will go pop!
*
* On entry,
*  r4 = kernel execution address
*  r6 = processor ID
*  r7 = architecture number
*  r8 = atags pointer
*  r9 = run-time address of “start”  (???)
* On exit,
*  r1, r2, r3, r9, r10, r12 corrupted
* This routine must preserve:
*  r4, r5, r6, r7, r8
*/
          .align     5                                                             /*2^5=32,即4字节对齐*/
cache_on:     mov     r3, #8               @ cache_on function   /*r3为偏移量,这里为8,就是cache_on,如果为12,就是cache_off*/
          b     call_cache_fn                                                  /*调用cache函数*/
/*
* Initialize the highest priority protection region, PR7
* to cover all 32bit address and cacheable and bufferable.
*/
__armv4_mpu_cache_on:
          mov     r0, #0x3f          @ 4G, the whole
          mcr     p15, 0, r0, c6, c7, 0     @ PR7 Area Setting
          mcr      p15, 0, r0, c6, c7, 1
          mov     r0, #0x80          @ PR7
          mcr     p15, 0, r0, c2, c0, 0     @ D-cache on
          mcr     p15, 0, r0, c2, c0, 1     @ I-cache on
          mcr     p15, 0, r0, c3, c0, 0     @ write-buffer on
          mov     r0, #0xc000
          mcr     p15, 0, r0, c5, c0, 1     @ I-access permission
          mcr     p15, 0, r0, c5, c0, 0     @ D-access permission
          mov     r0, #0
          mcr     p15, 0, r0, c7, c10, 4     @ drain write buffer
          mcr     p15, 0, r0, c7, c5, 0     @ flush(inval) I-Cache
          mcr     p15, 0, r0, c7, c6, 0     @ flush(inval) D-Cache
          mrc     p15, 0, r0, c1, c0, 0     @ read control reg
                              @ …I …. ..D. WC.M
          orr     r0, r0, #0x002d          @ …. …. ..1. 11.1
          orr     r0, r0, #0x1000          @ …1 …. …. ….
          mcr     p15, 0, r0, c1, c0, 0     @ write control reg
          mov     r0, #0
          mcr     p15, 0, r0, c7, c5, 0     @ flush(inval) I-Cache
          mcr     p15, 0, r0, c7, c6, 0     @ flush(inval) D-Cache
          mov     pc, lr
__armv3_mpu_cache_on:
          mov     r0, #0x3f          @ 4G, the whole
          mcr     p15, 0, r0, c6, c7, 0     @ PR7 Area Setting
          mov     r0, #0x80          @ PR7
          mcr     p15, 0, r0, c2, c0, 0     @ cache on
          mcr     p15, 0, r0, c3, c0, 0     @ write-buffer on
          mov     r0, #0xc000
          mcr     p15, 0, r0, c5, c0, 0     @ access permission
          mov     r0, #0
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mrc     p15, 0, r0, c1, c0, 0     @ read control reg
                              @ …. …. …. WC.M
          orr     r0, r0, #0x000d          @ …. …. …. 11.1
          mov     r0, #0
          mcr     p15, 0, r0, c1, c0, 0     @ write control reg
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mov     pc, lr
__setup_mmu:     sub     r3, r4, #16384          @ Page directory size  /*这里r4中存放着内核执行地址,将16K的一级页表放在这个内核执行地址下面的16K空间里,上面通过 sub  r3, r4, #16384  获得16K空间后,又将页表的起始地址进行16K对齐放在r3中。即ttb的低14位清零。*/
          bic     r3, r3, #0xff          @ Align the pointer
          bic     r3, r3, #0x3f00
/*
* Initialise the page tables, turning on the cacheable and bufferable
* bits for the RAM area only.
*/
          mov     r0, r3
          mov     r9, r0, lsr #18
          mov     r9, r9, lsl #18          @ start of RAM
          add     r10, r9, #0x10000000     @ a reasonable RAM size  /*把一级页表的起始地址保存在r0中,并通过r0获得一个ram起始地址(每个页面大小为1M)然后映射256M ram空间*/
          mov     r1, #0x12             /*一级描述符的bit[1:0]为10,表示这是一个section描述符。也即分页方式为段式分页*/
          orr     r1, r1, #3 << 10     /*一级描述符的access permission bits bit[11:10]为11*/
          add     r2, r3, #16384       /*一级描述符表的结束地址存放在r2中*/
1:          cmp     r1, r9               @ if virt > start of RAM             /*虚拟地址大于RAM的起始地址*/
          orrhs     r1, r1, #0x0c          @ set cacheable, bufferable   /*初始化页表,并在RAM空间里打开cacheable 和bufferable位*/
          cmp     r1, r10               @ if virt > end of RAM                /*虚拟地址大于RAM的结束地址*/
          bichs     r1, r1, #0x0c          @ clear cacheable, bufferable /*清除cacheable和bufferable位*/
          str     r1, [r0], #4          @ 1:1 mapping                          /*保存映射*/
          add     r1, r1, #1048576                                                /*处理下1MB*/
          teq     r0, r2                                                                  /*直到结束*/
          bne     1b
/*
* If ever we are running from Flash, then we surely want the cache
* to be enabled also for our execution instance…  We map 2MB of it
* so there is no map overlap problem for up to 1 MB compressed kernel.
* If the execution is in RAM then we would only be duplicating the above.
*/
          mov     r1, #0x1e
          orr     r1, r1, #3 << 10     /*这两行将描述的bit[11:10] bit[4:1]置位*/
          mov     r2, pc, lsr #20
          orr     r1, r1, r2, lsl #20    /*将当前地址进1M对齐,并与r1中的内容结合形成一个描述当前指令所在section的描述符*/
          add     r0, r3, r2, lsl #2     /*r3为刚才建立的一级描述符表的起始地址。通过将当前地址(pc)的高12位左移两位(形成14位索引)与r3中的地址 (低14位为0)相加形成一个4字节对齐的地址,这个地址也在16K的一级描述符表内。当前地址对应的描述符在一级页表中的位置*/
          str     r1, [r0], #4
          add     r1, r1, #1048576
          str     r1, [r0]                  /*这里将上面形成的描述符及其连续的下一个section描述写入上面4字节对齐地址处(一级页表中索引为r2左移2位)*/
          mov     pc, lr
ENDPROC(__setup_mmu)
__armv4_mmu_cache_on:
          mov     r12, lr
          bl     __setup_mmu          /*见上面__setup_mmu函数*/
          mov     r0, #0
          mcr     p15, 0, r0, c7, c10, 4     @ drain write buffer  /*清空写缓冲区,I/D tlb,设置r0为I cache使能,RR cache可替换*/
          mcr     p15, 0, r0, c8, c7, 0     @ flush I,D TLBs
          mrc     p15, 0, r0, c1, c0, 0     @ read control reg
          orr     r0, r0, #0x5000          @ I-cache enable, RR cache replacement
          orr     r0, r0, #0x0030
          bl     __common_mmu_cache_on   /*调用下面的__common_mmu_cache_on*/
          mov     r0, #0
          mcr     p15, 0, r0, c8, c7, 0     @ flush I,D TLBs /*清空I/DTLB*/
          mov     pc, r12                             /*返回*/
__armv7_mmu_cache_on:
          mov     r12, lr
          mrc     p15, 0, r11, c0, c1, 4     @ read ID_MMFR0
          tst     r11, #0xf          @ VMSA
          blne     __setup_mmu
          mov     r0, #0
          mcr     p15, 0, r0, c7, c10, 4     @ drain write buffer
          tst     r11, #0xf          @ VMSA
          mcrne     p15, 0, r0, c8, c7, 0     @ flush I,D TLBs
          mrc     p15, 0, r0, c1, c0, 0     @ read control reg
          orr     r0, r0, #0x5000          @ I-cache enable, RR cache replacement
          orr     r0, r0, #0x003c          @ write buffer
          orrne     r0, r0, #1          @ MMU enabled
          movne     r1, #-1
          mcrne     p15, 0, r3, c2, c0, 0     @ load page table pointer
          mcrne     p15, 0, r1, c3, c0, 0     @ load domain access control
          mcr     p15, 0, r0, c1, c0, 0     @ load control register
          mrc     p15, 0, r0, c1, c0, 0     @ and read it back
          mov     r0, #0
          mcr     p15, 0, r0, c7, c5, 4     @ ISB
          mov     pc, r12
__arm6_mmu_cache_on:
          mov     r12, lr
          bl     __setup_mmu
          mov     r0, #0
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mcr     p15, 0, r0, c5, c0, 0     @ invalidate whole TLB v3
          mov     r0, #0x30
          bl     __common_mmu_cache_on
          mov     r0, #0
          mcr     p15, 0, r0, c5, c0, 0     @ invalidate whole TLB v3
          mov     pc, r12
__common_mmu_cache_on:                                                         /*打开cache*/
#ifndef DEBUG
          orr     r0, r0, #0x000d          @ Write buffer, mmu
#endif
          mov     r1, #-1
          mcr     p15, 0, r3, c2, c0, 0     @ load page table pointer       /*把页表地址存于协处理器寄存器中*/
          mcr     p15, 0, r1, c3, c0, 0     @ load domain access control /*设置domain access control寄存 器*/
          b     1f
          .align     5               @ cache line aligned                            /*4字节对齐*/
1:          mcr     p15, 0, r0, c1, c0, 0     @ load control register       /*载入控制寄存器*/
          mrc     p15, 0, r0, c1, c0, 0     @ and read it back to            /*读回来*/
          sub     pc, lr, r0, lsr #32     @ properly flush pipeline           /*清空管道,返回*/
/*
* All code following this line is relocatable.  It is relocated by
* the above code to the end of the decompressed kernel image and
* executed there.  During this time, we have no stacks.
*
* r0     = decompressed kernel length
* r1-r3  = unused
* r4     = kernel execution address
* r5     = decompressed kernel start
* r6     = processor ID
* r7     = architecture ID
* r8     = atags pointer
* r9-r14 = corrupted
*/
          .align     5
reloc_start:     add     r9, r5, r0
          sub     r9, r9, #128          @ do not copy the stack  #r9为最终vmlinux需要放置的结束位置
          debug_reloc_start
          mov     r1, r4                   /*r1为最终vmliunx需要放置的开始位置*/
1:
          .rept     4
          ldmia     r5!, {r0, r2, r3, r10 – r14}     @ relocate kernel /*挪动代码*/
          stmia     r1!, {r0, r2, r3, r10 – r14}
          .endr
          cmp     r5, r9
          blo     1b
          add     sp, r1, #128          @ relocate the stack  /*重新定位栈指针*/
          debug_reloc_end
call_kernel:     bl     cache_clean_flush               /*flush掉cache*/
          bl     cache_off                                       /*关掉cache*/
          mov     r0, #0               @ must be zero
          mov     r1, r7               @ restore architecture number
          mov     r2, r8               @ restore atags pointer
          mov     pc, r4               @ call kernel            /*跳转到r4即真正内核的起始位置,参数:r0: 固定0,r1:架构id,r2:启动参数*/
/*
* Here follow the relocatable cache support functions for the
* various processors.  This is a generic hook for locating an
* entry and jumping to an instruction at the specified offset
* from the start of the block.  Please note this is all position
* independent code.
*
*  r1  = corrupted
*  r2  = corrupted
*  r3  = block offset
*  r6  = corrupted
*  r12 = corrupted
*/
call_cache_fn:     adr     r12, proc_types                     /*得到函数结构数组入口,proc_types定义在下面*/
#ifdef CONFIG_CPU_CP15
          mrc     p15, 0, r6, c0, c0     @ get processor ID
#else
          ldr     r6, =CONFIG_PROCESSOR_ID               /*r6为处理机ID*/
#endif
1:          ldr     r1, [r12, #0]          @ get value         /*这是一个结构数组,结构的第一个成员为架构id,第二个成员为掩码,第三个成员为cache_on函数,第四个成员为cache_off函数,第五个成员为cache_flush函数。*/
          ldr     r2, [r12, #4]          @ get mask
          eor     r1, r1, r6          @ (real ^ match)
          tst     r1, r2               @       & mask
          addeq     pc, r12, r3          @ call cache function  /*找到对应的结构了,则调用由r3决定的哪个函数,比如r3为8为调用cache_on函数,这里用pc增加方式调用,所以cache_on函数里返回就意味着从这里返回。*/
          add     r12, r12, #4*5                                       /*r12增加到下个结构*/
          b     1b
/*
* Table for cache operations.  This is basically:
*   – CPU ID match
*   – CPU ID mask
*   – ‘cache on’ method instruction
*   – ‘cache off’ method instruction
*   – ‘cache flush’ method instruction
*
* We match an entry using: ((real_id ^ match) & mask) == 0
*
* Writethrough caches generally only need ‘on’ and ‘off’
* methods.  Writeback caches _must_ have the flush method
* defined.
*/
          .type     proc_types,#object                       /*结构表,每个结构20个字节*/
proc_types:
          .word     0x41560600          @ ARM6/610
          .word     0xffffffe0
          b     __arm6_mmu_cache_off     @ works, but slow
          b     __arm6_mmu_cache_off
          mov     pc, lr
@          b     __arm6_mmu_cache_on          @ untested
@          b     __arm6_mmu_cache_off
@          b     __armv3_mmu_cache_flush
          .word     0x00000000          @ old ARM ID
          .word     0x0000f000
          mov     pc, lr
          mov     pc, lr
          mov     pc, lr
          .word     0x41007000          @ ARM7/710
          .word     0xfff8fe00
          b     __arm7_mmu_cache_off
          b     __arm7_mmu_cache_off
          mov     pc, lr
          .word     0x41807200          @ ARM720T (writethrough)
          .word     0xffffff00
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          mov     pc, lr
          .word     0x41007400          @ ARM74x
          .word     0xff00ff00
          b     __armv3_mpu_cache_on
          b     __armv3_mpu_cache_off
          b     __armv3_mpu_cache_flush
          .word     0x41009400          @ ARM94x
          .word     0xff00ff00
          b     __armv4_mpu_cache_on
          b     __armv4_mpu_cache_off
          b     __armv4_mpu_cache_flush
          .word     0x00007000          @ ARM7 IDs
          .word     0x0000f000
          mov     pc, lr
          mov     pc, lr
          mov     pc, lr
          @ Everything from here on will be the new ID system.
          .word     0x4401a100          @ sa110 / sa1100
          .word     0xffffffe0
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv4_mmu_cache_flush
          .word     0x6901b110          @ sa1110
          .word     0xfffffff0
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv4_mmu_cache_flush
          .word     0x56050000          @ Feroceon
          .word     0xff0f0000
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv5tej_mmu_cache_flush
          @ These match on the architecture ID
          .word     0x00020000          @ ARMv4T
          .word     0x000f0000
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv4_mmu_cache_flush
          .word     0x00050000          @ ARMv5TE
          .word     0x000f0000
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv4_mmu_cache_flush
          .word     0x00060000          @ ARMv5TEJ
          .word     0x000f0000
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv5tej_mmu_cache_flush
          .word     0x0007b000          @ ARMv6
          .word     0x000ff000
          b     __armv4_mmu_cache_on
          b     __armv4_mmu_cache_off
          b     __armv6_mmu_cache_flush
          .word     0x000f0000          @ new CPU Id
          .word     0x000f0000
          b     __armv7_mmu_cache_on
          b     __armv7_mmu_cache_off
          b     __armv7_mmu_cache_flush
          .word     0               @ unrecognised type
          .word     0
          mov     pc, lr
          mov     pc, lr
          mov     pc, lr
          .size     proc_types, . – proc_types                   /*结构大小*/
/*
* Turn off the Cache and MMU.  ARMv3 does not support
* reading the control register, but ARMv4 does.
*
* On entry,  r6 = processor ID
* On exit,   r0, r1, r2, r3, r12 corrupted
* This routine must preserve: r4, r6, r7
*/
          .align     5
cache_off:     mov     r3, #12               @ cache_off function
          b     call_cache_fn
__armv4_mpu_cache_off:
          mrc     p15, 0, r0, c1, c0
          bic     r0, r0, #0x000d
          mcr     p15, 0, r0, c1, c0     @ turn MPU and cache off
          mov     r0, #0
          mcr     p15, 0, r0, c7, c10, 4     @ drain write buffer
          mcr     p15, 0, r0, c7, c6, 0     @ flush D-Cache
          mcr     p15, 0, r0, c7, c5, 0     @ flush I-Cache
          mov     pc, lr
__armv3_mpu_cache_off:
          mrc     p15, 0, r0, c1, c0
          bic     r0, r0, #0x000d
          mcr     p15, 0, r0, c1, c0, 0     @ turn MPU and cache off
          mov     r0, #0
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mov     pc, lr
__armv4_mmu_cache_off:
          mrc     p15, 0, r0, c1, c0
          bic     r0, r0, #0x000d
          mcr     p15, 0, r0, c1, c0     @ turn MMU and cache off
          mov     r0, #0
          mcr     p15, 0, r0, c7, c7     @ invalidate whole cache v4
          mcr     p15, 0, r0, c8, c7     @ invalidate whole TLB v4
          mov     pc, lr
__armv7_mmu_cache_off:
          mrc     p15, 0, r0, c1, c0
          bic     r0, r0, #0x000d
          mcr     p15, 0, r0, c1, c0     @ turn MMU and cache off
          mov     r12, lr
          bl     __armv7_mmu_cache_flush
          mov     r0, #0
          mcr     p15, 0, r0, c8, c7, 0     @ invalidate whole TLB
          mov     pc, r12
__arm6_mmu_cache_off:
          mov     r0, #0x00000030          @ ARM6 control reg.
          b     __armv3_mmu_cache_off
__arm7_mmu_cache_off:
          mov     r0, #0x00000070          @ ARM7 control reg.
          b     __armv3_mmu_cache_off
__armv3_mmu_cache_off:
          mcr     p15, 0, r0, c1, c0, 0     @ turn MMU and cache off
          mov     r0, #0
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mcr     p15, 0, r0, c5, c0, 0     @ invalidate whole TLB v3
          mov     pc, lr
/*
* Clean and flush the cache to maintain consistency.
*
* On entry,
*  r6 = processor ID
* On exit,
*  r1, r2, r3, r11, r12 corrupted
* This routine must preserve:
*  r0, r4, r5, r6, r7
*/
          .align     5
cache_clean_flush:
          mov     r3, #16
          b     call_cache_fn
__armv4_mpu_cache_flush:
          mov     r2, #1
          mov     r3, #0
          mcr     p15, 0, ip, c7, c6, 0     @ invalidate D cache
          mov     r1, #7 << 5          @ 8 segments
1:          orr     r3, r1, #63 << 26     @ 64 entries
2:          mcr     p15, 0, r3, c7, c14, 2     @ clean & invalidate D index
          subs     r3, r3, #1 << 26
          bcs     2b               @ entries 63 to 0
          subs      r1, r1, #1 << 5
          bcs     1b               @ segments 7 to 0
          teq     r2, #0
          mcrne     p15, 0, ip, c7, c5, 0     @ invalidate I cache
          mcr     p15, 0, ip, c7, c10, 4     @ drain WB
          mov     pc, lr
__armv6_mmu_cache_flush:
          mov     r1, #0
          mcr     p15, 0, r1, c7, c14, 0     @ clean+invalidate D
          mcr     p15, 0, r1, c7, c5, 0     @ invalidate I+BTB
          mcr     p15, 0, r1, c7, c15, 0     @ clean+invalidate unified
          mcr     p15, 0, r1, c7, c10, 4     @ drain WB
          mov     pc, lr
__armv7_mmu_cache_flush:
          mrc     p15, 0, r10, c0, c1, 5     @ read ID_MMFR1
          tst     r10, #0xf << 16          @ hierarchical cache (ARMv7)
          beq     hierarchical
          mov     r10, #0
          mcr     p15, 0, r10, c7, c14, 0     @ clean+invalidate D
          b     iflush
hierarchical:
          stmfd     sp!, {r0-r5, r7, r9-r11}
          mrc     p15, 1, r0, c0, c0, 1     @ read clidr
          ands     r3, r0, #0x7000000     @ extract loc from clidr
          mov     r3, r3, lsr #23          @ left align loc bit field
          beq     finished          @ if loc is 0, then no need to clean
          mov     r10, #0               @ start clean at cache level 0
loop1:
          add     r2, r10, r10, lsr #1     @ work out 3x current cache level
          mov     r1, r0, lsr r2          @ extract cache type bits from clidr
          and     r1, r1, #7          @ mask of the bits for current cache only
          cmp     r1, #2               @ see what cache we have at this level
          blt     skip               @ skip if no cache, or just i-cache
          mcr     p15, 2, r10, c0, c0, 0     @ select current cache level in cssr
          mcr     p15, 0, r10, c7, c5, 4     @ isb to sych the new cssr&csidr
          mrc     p15, 1, r1, c0, c0, 0     @ read the new csidr
          and     r2, r1, #7          @ extract the length of the cache lines
          add     r2, r2, #4          @ add 4 (line length offset)
          ldr     r4, =0x3ff
          ands     r4, r4, r1, lsr #3     @ find maximum number on the way size
          clz     r5, r4               @ find bit position of way size increment
          ldr     r7, =0x7fff
          ands     r7, r7, r1, lsr #13     @ extract max number of the index size
loop2:
          mov     r9, r4               @ create working copy of max way size
loop3:
          orr     r11, r10, r9, lsl r5     @ factor way and cache number into r11
          orr     r11, r11, r7, lsl r2     @ factor index number into r11
          mcr     p15, 0, r11, c7, c14, 2     @ clean & invalidate by set/way
          subs     r9, r9, #1          @ decrement the way
          bge     loop3
          subs     r7, r7, #1          @ decrement the index
          bge     loop2
skip:
          add     r10, r10, #2          @ increment cache number
          cmp     r3, r10
          bgt     loop1
finished:
          mov     r10, #0               @ swith back to cache level 0
          mcr     p15, 2, r10, c0, c0, 0     @ select current cache level in cssr
          ldmfd     sp!, {r0-r5, r7, r9-r11}
iflush:
          mcr     p15, 0, r10, c7, c5, 0     @ invalidate I+BTB
          mcr     p15, 0, r10, c7, c10, 4     @ drain WB
          mov     pc, lr
__armv5tej_mmu_cache_flush:
1:          mrc     p15, 0, r15, c7, c14, 3     @ test,clean,invalidate D cache
          bne     1b
          mcr     p15, 0, r0, c7, c5, 0     @ flush I cache
          mcr     p15, 0, r0, c7, c10, 4     @ drain WB
          mov     pc, lr
__armv4_mmu_cache_flush:
          mov     r2, #64*1024          @ default: 32K dcache size (*2)
          mov     r11, #32          @ default: 32 byte line size
          mrc     p15, 0, r3, c0, c0, 1     @ read cache type
          teq     r3, r6               @ cache ID register present?
          beq     no_cache_id
          mov     r1, r3, lsr #18
          and     r1, r1, #7
          mov     r2, #1024
          mov     r2, r2, lsl r1          @ base dcache size *2
          tst     r3, #1 << 14          @ test M bit
          addne     r2, r2, r2, lsr #1     @ +1/2 size if M == 1
          mov     r3, r3, lsr #12
          and     r3, r3, #3
          mov     r11, #8
          mov     r11, r11, lsl r3     @ cache line size in bytes
no_cache_id:
          bic     r1, pc, #63          @ align to longest cache line
          add     r2, r1, r2
1:          ldr     r3, [r1], r11          @ s/w flush D cache
          teq     r1, r2
          bne     1b
          mcr     p15, 0, r1, c7, c5, 0     @ flush I cache
          mcr     p15, 0, r1, c7, c6, 0     @ flush D cache
          mcr     p15, 0, r1, c7, c10, 4     @ drain WB
          mov     pc, lr
__armv3_mmu_cache_flush:
__armv3_mpu_cache_flush:
          mov     r1, #0
          mcr     p15, 0, r0, c7, c0, 0     @ invalidate whole cache v3
          mov     pc, lr
/*
* Various debugging routines for printing hex characters and
* memory, which again must be relocatable.
*/
#ifdef DEBUG
          .type     phexbuf,#object             /*phexbuf是一个对象*/
phexbuf:     .space     12                       /*保留12字节长度*/
          .size     phexbuf, . – phexbuf        /*phexbuf大小为当前位置减掉phexbuf起始位置,即12字节*/
phex:          adr     r3, phexbuf              /*输出寄存器数据,参数r0: 需要输出的值,r1:为输出字节长度,一般为8字节,比如输出ddddbbbb这样子的16进制数据就是8字节字符长*/
          mov     r2, #0
          strb     r2, [r3, r1]                     /*变成字符串,就是字符数组尾部加个0*/
1:          subs     r1, r1, #1                  /*长度减1*/
          movmi     r0, r3                        /*长度为负,则处理完成,把r3即phexbuf的地址存入r0,调用puts进行输出*/
          bmi     puts
          and     r2, r0, #15                    /*长度还有,则r2 = r0 & 0x0f,即处理最低4位*/
          mov     r0, r0, lsr #4                 /*r0 = r0 >> 4*/
          cmp     r2, #10                        /*r2就是最低4位大于10,即0x0a,0x0b,…,0x0f,则先加一个7,因为’A’的asc码是65,而’0’是48,两者相差17。*/
          addge     r2, r2, #7
          add     r2, r2, #’0′                    /*把数字加上’0’,变成ASCII码*/
          strb     r2, [r3, r1]                   /*存到phexbuf的尾部,就是从后面往前依次写入传入的r0的地位到高位,这样输出时就会正过来。*/
          b     1b                                   /*继续处理*/
puts:          loadsp     r3                    /*输出字符串,把输出的数据端口放入r3,往r3写入数据就是输出数据到外面,比如输出到UART*/
1:          ldrb     r2, [r0], #1               /*r2 = r0即上面的phexbuf地址的字符,然后r0 = r0 + 1*/
          teq     r2, #0                          /*字符串结束,最后一个字节为0,则返回*/
          moveq     pc, lr
2:          writeb     r2, r3                    /*把r2输出*/
          mov     r1, #0x00020000         /*等待输出完成*/
3:          subs     r1, r1, #1
          bne     3b
          teq     r2, #’\n’                      /*如果刚才输出的是’\n’字符,则跟着输出一个’\r’字符*/
          moveq     r2, #’\r’
          beq     2b
          teq     r0, #0                        /*r0在putc时为0,在puts时不会为0,所以还是会跳到前面去输出其它字符。*/
          bne     1b
          mov     pc, lr                        /*r0为0则返回*/
putc:                                             /*输出一个字符*/
          mov     r2, r0                       /*r2 为字符*/
          mov     r0, #0                      /*r0 = 0*/
          loadsp     r3                         /*载入输出端口r3*/
          b     2b                                /*跳转到前面的2号标签处执行*/
memdump:     mov     r12, r0         /*内存dump,256bytes,参数r0 为起始地址*/
          mov     r10, lr                     /*保存lr,返回用*/
          mov     r11, #0
2:          mov     r0, r11, lsl #2       /*r0 = r11 << 2*/
          add     r0, r0, r12                /*r0 = r0 + 起始地址*/
          mov     r1, #8                     /*r1 为长度8*/
          bl     phex                           /*调用phex输出r0的值*/
          mov     r0, #’:’
          bl     putc                           /*输出’:’*/
1:          mov     r0, #’ ‘
          bl     putc                           /*输出空格*/
          ldr     r0, [r12, r11, lsl #2]   /*输出地址所对应的值,r0为起始地址+r11<<2所对应地址的值*/
          mov     r1, #8                     /*一次输出4字节内容,显示8字节的字符*/
          bl     phex
          and     r0, r11, #7            /*r0 = r11的低三位*/
          teq     r0, #3
          moveq     r0, #’ ‘             /*r0为3,即4×4=16字节,则输出多一个空格*/
          bleq     putc
          and     r0, r11, #7
          add     r11, r11, #1        /*r11 增加1*/
          teq     r0, #7                 /*r0 不为7,即不是8*4=32字节边界,跳到前面的1处执行*/
          bne     1b
          mov     r0, #’\n’            /*每32字节输出回车换行*/
          bl     putc
          cmp     r11, #64           /*r11 = 64*4表示256字节输出完成,返回*/
          blt     2b                       /*最终输出8行数据,格式:00AA00BB: 89ABCDEF0000000089ABCDEF00000000 89ABCDEF2222111189ABCDEF22221111*/
          mov     pc, r10
#endif
          .ltorg
reloc_end:
          .align
          .section “.stack”, “w”          /*可写的栈段,大小为4KB*/
user_stack:     .space     4096
1,571次阅读

Post navigation

前一篇:

mini6410板uboot的Armlinux.c

后一篇:

OpenStack 安装和使用(一)

发表回复 取消回复

要发表评论,您必须先登录。

个人介绍

需要么,有事情这里找联系方式:关于天楚锐齿

=== 美女同欣赏,好酒共品尝 ===

微信扫描二维码赞赏该文章:

扫描二维码分享该文章:

分类

  • Linux&Android (81)
  • Uncategorized (1)
  • 下载 (28)
  • 云计算 (38)
  • 人工智能 (9)
  • 大数据 (35)
  • 嵌入式 (34)
  • 杂七杂八 (35)
  • 物联网 (65)
  • 网络 (25)
  • 通信 (22)

归档

近期文章

  • 飞书机器人发送卡片interactive消息
  • Springboot JPA实现对数据库表统一的增删改查
  • WEB的内容安全策略CSP(Content-Security-Policy)
  • CSS利用@media和viewport实现响应式布局自动适配手机电脑等
  • VUE前端增加国际化支持

近期评论

  • linux爱好者 发表在《Linux策略路由及iptables mangle、ip rule、ip route关系及一种Network is unreachable错误》
  • maxshu 发表在《使用Android的HIDL+AIDL方式编写从HAL层到APP层的程序》
  • Ambition 发表在《使用Android的HIDL+AIDL方式编写从HAL层到APP层的程序》
  • Ambition 发表在《使用Android的HIDL+AIDL方式编写从HAL层到APP层的程序》
  • maxshu 发表在《Android9下用ethernet 的Tether模式来做路由器功能》

阅读量

  • 使用Android的HIDL+AIDL方式编写从HAL层到APP层的程序 - 23,802次阅读
  • 卸载深信服Ingress、SecurityDesktop客户端 - 18,505次阅读
  • 车机技术之车规级Linux-Automotive Grade Linux(AGL) - 10,562次阅读
  • linux下的unbound DNS服务器设置详解 - 9,319次阅读
  • 在Android9下用ndk编译vSomeIP和CommonAPI以及使用例子 - 9,132次阅读
  • linux的tee命令导致ssh客户端下的shell卡住不动 - 8,635次阅读
  • Linux策略路由及iptables mangle、ip rule、ip route关系及一种Network is unreachable错误 - 8,122次阅读
  • 车机技术之360°全景影像(环视)系统 - 8,083次阅读
  • 车机技术之Android Automotive - 7,937次阅读
  • Windows下安装QEMU并在qemu上安装ubuntu和debian - 7,838次阅读

其他操作

  • 注册
  • 登录
  • 条目 feed
  • 评论 feed
  • WordPress.org

联系方式

地址
深圳市科技园

时间
周一至周五:  9:00~12:00,14:00~18:00
周六和周日:10:00~12:00

标签

android AT命令 CAN centos docker Hadoop hdfs ip java kickstart linux mapreduce mini6410 modem nova OAuth openstack os python socket ssh uboot 内核 协议 安装 嵌入式 性能 报表 授权 操作系统 数据 数据库 月报 模型 汽车 深信服 源代码 统计 编译 脚本 虚拟机 调制解调器 车机 金融 鉴权
© 2025 天楚锐齿