blob: 6bafae7f2da4e33eb9db22067753224533dacc0c [file] [log] [blame]
Greg Hartman76d05dc2016-11-23 15:51:27 -08001;; -*- fundamental -*-
2;; -----------------------------------------------------------------------
3;;
4;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved
5;; Copyright 2009 Intel Corporation; author: H. Peter Anvin
6;;
7;; This program is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
10;; Boston MA 02111-1307, USA; either version 2 of the License, or
11;; (at your option) any later version; incorporated herein by reference.
12;;
13;; -----------------------------------------------------------------------
14
15;;
16;; init16.asm
17;;
18;; Routine to initialize and to trampoline into 32-bit
19;; protected memory. This code is derived from bcopy32.inc and
20;; com32.inc in the main SYSLINUX distribution.
21;;
22
23%include '../version.gen'
24
25MY_CS equ 0x0800 ; Segment address to use
26CS_BASE equ (MY_CS << 4) ; Corresponding address
27
28; Low memory bounce buffer
29BOUNCE_SEG equ (MY_CS+0x1000)
30
31%define DO_WBINVD 0
32
33 section .rodata align=16
34 section .data align=16
35 section .bss align=16
36 section .stack align=16 nobits
37stack resb 512
38stack_end equ $
39
40;; -----------------------------------------------------------------------
41;; Kernel image header
42;; -----------------------------------------------------------------------
43
44 section .text ; Must be first in image
45 bits 16
46
47cmdline times 497 db 0 ; We put the command line here
48setup_sects db 0
49root_flags dw 0
50syssize dw 0
51swap_dev dw 0
52ram_size dw 0
53vid_mode dw 0
54root_dev dw 0
55boot_flag dw 0xAA55
56
57_start: jmp short start
58
59 db "HdrS" ; Header signature
60 dw 0x0203 ; Header version number
61
62realmode_swtch dw 0, 0 ; default_switch, SETUPSEG
63start_sys_seg dw 0x1000 ; obsolete
64version_ptr dw memdisk_version-0x200 ; version string ptr
65type_of_loader db 0 ; Filled in by boot loader
66loadflags db 1 ; Please load high
67setup_move_size dw 0 ; Unused
68code32_start dd 0x100000 ; 32-bit start address
69ramdisk_image dd 0 ; Loaded ramdisk image address
70ramdisk_size dd 0 ; Size of loaded ramdisk
71bootsect_kludge dw 0, 0
72heap_end_ptr dw 0
73pad1 dw 0
74cmd_line_ptr dd 0 ; Command line
75ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address
76
77;
78; These fields aren't real setup fields, they're poked in by the
79; 32-bit code.
80;
81b_esdi dd 0 ; ES:DI for boot sector invocation
82b_edx dd 0 ; EDX for boot sector invocation
83b_sssp dd 0 ; SS:SP on boot sector invocation
84b_csip dd 0 ; CS:IP on boot sector invocation
85
86 section .rodata
87memdisk_version:
88 db "MEMDISK ", VERSION_STR, " ", DATE, 0
89
90;; -----------------------------------------------------------------------
91;; End kernel image header
92;; -----------------------------------------------------------------------
93
94;
95; Move ourselves down into memory to reduce the risk of conflicts;
96; then canonicalize CS to match the other segments.
97;
98 section .text
99 bits 16
100start:
101 mov ax,MY_CS
102 mov es,ax
103 movzx cx,byte [setup_sects]
104 inc cx ; Add one for the boot sector
105 shl cx,7 ; Convert to dwords
106 xor si,si
107 xor di,di
108 mov fs,si ; fs <- 0
109 cld
110 rep movsd
111 mov ds,ax
112 mov ss,ax
113 mov esp,stack_end
114 jmp MY_CS:.next
115.next:
116
117;
118; Copy the command line, if there is one
119;
120copy_cmdline:
121 xor di,di ; Bottom of our own segment (= "boot sector")
122 mov eax,[cmd_line_ptr]
123 and eax,eax
124 jz .endcmd ; No command line
125 mov si,ax
126 shr eax,4 ; Convert to segment
127 and si,0x000F ; Starting offset only
128 mov gs,ax
129 mov cx,496 ; Max number of bytes
130.copycmd:
131 gs lodsb
132 and al,al
133 jz .endcmd
134 stosb
135 loop .copycmd
136.endcmd:
137 xor al,al
138 stosb
139
140;
141; Now jump to 32-bit code
142;
143 sti
144 call init32
145;
146; When init32 returns, we have been set up, the new boot sector loaded,
147; and we should go and and run the newly loaded boot sector.
148;
149; The setup function will have poked values into the setup area.
150;
151 movzx edi,word [cs:b_esdi]
152 mov es,word [cs:b_esdi+2]
153 mov edx,[cs:b_edx]
154
155 cli
156 xor esi,esi ; No partition table involved
157 mov ds,si ; Make all the segments consistent
158 mov fs,si
159 mov gs,si
160 lss sp,[cs:b_sssp]
161 movzx esp,sp
162 jmp far [cs:b_csip]
163
164;
165; We enter protected mode, set up a flat 32-bit environment, run rep movsd
166; and then exit. IMPORTANT: This code assumes cs == MY_CS.
167;
168; This code is probably excessively anal-retentive in its handling of
169; segments, but this stuff is painful enough as it is without having to rely
170; on everything happening "as it ought to."
171;
172DummyTSS equ 0x580 ; Hopefully safe place in low mmoery
173
174 section .data
175
176 ; desc base, limit, flags
177%macro desc 3
178 dd (%2 & 0xffff) | ((%1 & 0xffff) << 16)
179 dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16)
180%endmacro
181
182 align 8, db 0
183call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT
184.adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction
185 dw 0
186
187 ; 0008: Dummy TSS to make Intel VT happy
188 ; Should never be actually accessed...
189 desc DummyTSS, 103, 0x8089
190
191 ; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K
192 desc CS_BASE, 0xffff, 0x009b
193
194 ; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K
195 desc CS_BASE, 0xffff, 0x0093
196
197 ; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G
198 desc 0, 0xfffff, 0xc09b
199
200 ; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G
201 desc 0, 0xfffff, 0xc093
202
203call32_gdt_size: equ $-call32_gdt
204
205err_a20: db 'ERROR: A20 gate not responding!',13,10,0
206
207 section .bss
208 alignb 4
209Return resd 1 ; Return value
210SavedSP resw 1 ; Place to save SP
211A20Tries resb 1
212
213 section .data
214 align 4, db 0
215Target dd 0 ; Target address
216Target_Seg dw 20h ; Target CS
217
218A20Type dw 0 ; Default = unknown
219
220 section .text
221 bits 16
222;
223; Routines to enable and disable (yuck) A20. These routines are gathered
224; from tips from a couple of sources, including the Linux kernel and
225; http://www.x86.org/. The need for the delay to be as large as given here
226; is indicated by Donnie Barnes of RedHat, the problematic system being an
227; IBM ThinkPad 760EL.
228;
229; We typically toggle A20 twice for every 64K transferred.
230;
231%define io_delay call _io_delay
232%define IO_DELAY_PORT 80h ; Invalid port (we hope!)
233%define disable_wait 32 ; How long to wait for a disable
234
235%define A20_DUNNO 0 ; A20 type unknown
236%define A20_NONE 1 ; A20 always on?
237%define A20_BIOS 2 ; A20 BIOS enable
238%define A20_KBC 3 ; A20 through KBC
239%define A20_FAST 4 ; A20 through port 92h
240
241 align 2, db 0
242A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast
243A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast
244a20_adjust_cnt equ ($-A20List)/2
245
246slow_out: out dx, al ; Fall through
247
248_io_delay: out IO_DELAY_PORT,al
249 out IO_DELAY_PORT,al
250 ret
251
252enable_a20:
253 pushad
254 mov byte [A20Tries],255 ; Times to try to make this work
255
256try_enable_a20:
257
258;
259; Flush the caches
260;
261%if DO_WBINVD
262 call try_wbinvd
263%endif
264
265;
266; If the A20 type is known, jump straight to type
267;
268 mov bp,[A20Type]
269 add bp,bp ; Convert to word offset
270.adj4: jmp word [bp+A20List]
271
272;
273; First, see if we are on a system with no A20 gate
274;
275a20_dunno:
276a20_none:
277 mov byte [A20Type], A20_NONE
278 call a20_test
279 jnz a20_done
280
281;
282; Next, try the BIOS (INT 15h AX=2401h)
283;
284a20_bios:
285 mov byte [A20Type], A20_BIOS
286 mov ax,2401h
287 pushf ; Some BIOSes muck with IF
288 int 15h
289 popf
290
291 call a20_test
292 jnz a20_done
293
294;
295; Enable the keyboard controller A20 gate
296;
297a20_kbc:
298 mov dl, 1 ; Allow early exit
299 call empty_8042
300 jnz a20_done ; A20 live, no need to use KBC
301
302 mov byte [A20Type], A20_KBC ; Starting KBC command sequence
303
304 mov al,0D1h ; Write output port
305 out 064h, al
306 call empty_8042_uncond
307
308 mov al,0DFh ; A20 on
309 out 060h, al
310 call empty_8042_uncond
311
312 ; Apparently the UHCI spec assumes that A20 toggle
313 ; ends with a null command (assumed to be for sychronization?)
314 ; Put it here to see if it helps anything...
315 mov al,0FFh ; Null command
316 out 064h, al
317 call empty_8042_uncond
318
319 ; Verify that A20 actually is enabled. Do that by
320 ; observing a word in low memory and the same word in
321 ; the HMA until they are no longer coherent. Note that
322 ; we don't do the same check in the disable case, because
323 ; we don't want to *require* A20 masking (SYSLINUX should
324 ; work fine without it, if the BIOS does.)
325.kbc_wait: push cx
326 xor cx,cx
327.kbc_wait_loop:
328 call a20_test
329 jnz a20_done_pop
330 loop .kbc_wait_loop
331
332 pop cx
333;
334; Running out of options here. Final attempt: enable the "fast A20 gate"
335;
336a20_fast:
337 mov byte [A20Type], A20_FAST ; Haven't used the KBC yet
338 in al, 092h
339 or al,02h
340 and al,~01h ; Don't accidentally reset the machine!
341 out 092h, al
342
343.fast_wait: push cx
344 xor cx,cx
345.fast_wait_loop:
346 call a20_test
347 jnz a20_done_pop
348 loop .fast_wait_loop
349
350 pop cx
351
352;
353; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up
354; and report failure to the user.
355;
356
357 dec byte [A20Tries]
358 jnz try_enable_a20
359
360
361 ; Error message time
362 mov si,err_a20
363print_err:
364 lodsb
365 and al,al
366 jz die
367 mov bx,7
368 mov ah,0xe
369 int 10h
370 jmp print_err
371
372
373die:
374 sti
375.hlt: hlt
376 jmp short .hlt
377
378;
379; A20 unmasked, proceed...
380;
381a20_done_pop: pop cx
382a20_done: popad
383 ret
384
385;
386; This routine tests if A20 is enabled (ZF = 0). This routine
387; must not destroy any register contents.
388;
389
390; This is the INT 1Fh vector, which is standard PCs is used by the
391; BIOS when the screen is in graphics mode. Even if it is, it points to
392; data, not code, so it should be safe enough to fiddle with.
393A20Test equ (1Fh*4)
394
395a20_test:
396 push ds
397 push es
398 push cx
399 push eax
400 xor ax,ax
401 mov ds,ax ; DS == 0
402 dec ax
403 mov es,ax ; ES == 0FFFFh
404 mov cx,32 ; Loop count
405 mov eax,[A20Test]
406 cmp eax,[es:A20Test+10h]
407 jne .a20_done
408 push eax
409.a20_wait:
410 inc eax
411 mov [A20Test],eax
412 io_delay
413 cmp eax,[es:A20Test+10h]
414 loopz .a20_wait
415 pop dword [A20Test] ; Restore original value
416.a20_done:
417 pop eax
418 pop cx
419 pop es
420 pop ds
421 ret
422
423disable_a20:
424 pushad
425;
426; Flush the caches
427;
428%if DO_WBINVD
429 call try_wbinvd
430%endif
431
432 mov bp,[A20Type]
433 add bp,bp ; Convert to word offset
434.adj5: jmp word [bp+A20DList]
435
436a20d_bios:
437 mov ax,2400h
438 pushf ; Some BIOSes muck with IF
439 int 15h
440 popf
441 jmp short a20d_snooze
442
443;
444; Disable the "fast A20 gate"
445;
446a20d_fast:
447 in al, 092h
448 and al,~03h
449 out 092h, al
450 jmp short a20d_snooze
451
452;
453; Disable the keyboard controller A20 gate
454;
455a20d_kbc:
456 call empty_8042_uncond
457
458 mov al,0D1h
459 out 064h, al ; Write output port
460 call empty_8042_uncond
461
462 mov al,0DDh ; A20 off
463 out 060h, al
464 call empty_8042_uncond
465
466 mov al,0FFh ; Null command/synchronization
467 out 064h, al
468 call empty_8042_uncond
469
470 ; Wait a bit for it to take effect
471a20d_snooze:
472 push cx
473 mov cx, disable_wait
474.delayloop: call a20_test
475 jz .disabled
476 loop .delayloop
477.disabled: pop cx
478a20d_dunno:
479a20d_none:
480 popad
481 ret
482
483;
484; Routine to empty the 8042 KBC controller. If dl != 0
485; then we will test A20 in the loop and exit if A20 is
486; suddenly enabled.
487;
488empty_8042_uncond:
489 xor dl,dl
490empty_8042:
491 call a20_test
492 jz .a20_on
493 and dl,dl
494 jnz .done
495.a20_on: io_delay
496 in al, 064h ; Status port
497 test al,1
498 jz .no_output
499 io_delay
500 in al, 060h ; Read input
501 jmp short empty_8042
502.no_output:
503 test al,2
504 jnz empty_8042
505 io_delay
506.done: ret
507
508;
509; Execute a WBINVD instruction if possible on this CPU
510;
511%if DO_WBINVD
512try_wbinvd:
513 wbinvd
514 ret
515%endif
516
517 section .bss
518 alignb 4
519PMESP resd 1 ; Protected mode %esp
520
521 section .idt nobits align=4096
522 alignb 4096
523pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs
524
525
526
527
528pm_entry: equ 0x100000
529
530 section .rodata
531 align 2, db 0
532call32_rmidt:
533 dw 0ffffh ; Limit
534 dd 0 ; Address
535
536 section .data
537 alignb 2
538call32_pmidt:
539 dw 8*256 ; Limit
540 dd 0 ; Address (entered later)
541
542 section .text
543;
544; This is the main entrypoint in this function
545;
546init32:
547 mov bx,call32_call_start ; Where to go in PM
548
549;
550; Enter protected mode. BX contains the entry point relative to the
551; real-mode CS.
552;
553call32_enter_pm:
554 mov ax,cs
555 mov ds,ax
556 movzx ebp,ax
557 shl ebp,4 ; EBP <- CS_BASE
558 movzx ebx,bx
559 add ebx,ebp ; entry point += CS_BASE
560 cli
561 mov [SavedSP],sp
562 cld
563 call enable_a20
564 mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy
565 o32 lgdt [call32_gdt] ; Set up GDT
566 o32 lidt [call32_pmidt] ; Set up IDT
567 mov eax,cr0
568 or al,1
569 mov cr0,eax ; Enter protected mode
570 jmp 20h:strict dword .in_pm+CS_BASE
571.pm_jmp equ $-6
572
573
574 bits 32
575.in_pm:
576 xor eax,eax ; Available for future use...
577 mov fs,eax
578 mov gs,eax
579 lldt ax
580
581 mov al,28h ; Set up data segments
582 mov es,eax
583 mov ds,eax
584 mov ss,eax
585
586 mov al,08h
587 ltr ax
588
589 mov esp,[ebp+PMESP] ; Load protmode %esp if available
590 jmp ebx ; Go to where we need to go
591
592;
593; This is invoked before first dispatch of the 32-bit code, in 32-bit mode
594;
595call32_call_start:
596 ;
597 ; Set up a temporary stack in the bounce buffer;
598 ; start32.S will override this to point us to the real
599 ; high-memory stack.
600 ;
601 mov esp, (BOUNCE_SEG << 4) + 0x10000
602
603 push dword call32_enter_rm.rm_jmp+CS_BASE
604 push dword call32_enter_pm.pm_jmp+CS_BASE
605 push dword stack_end ; RM size
606 push dword call32_gdt+CS_BASE
607 push dword call32_handle_interrupt+CS_BASE
608 push dword CS_BASE ; Segment base
609 push dword (BOUNCE_SEG << 4) ; Bounce buffer address
610 push dword call32_syscall+CS_BASE ; Syscall entry point
611
612 call pm_entry-CS_BASE ; Run the program...
613
614 ; ... fall through to call32_exit ...
615
616call32_exit:
617 mov bx,call32_done ; Return to command loop
618
619call32_enter_rm:
620 ; Careful here... the PM code may have relocated the
621 ; entire RM code, so we need to figure out exactly
622 ; where we are executing from. If the PM code has
623 ; relocated us, it *will* have adjusted the GDT to
624 ; match, though.
625 call .here
626.here: pop ebp
627 sub ebp,.here
628 o32 sidt [ebp+call32_pmidt]
629 cli
630 cld
631 mov [ebp+PMESP],esp ; Save exit %esp
632 xor esp,esp ; Make sure the high bits are zero
633 jmp 10h:.in_pm16 ; Return to 16-bit mode first
634
635 bits 16
636.in_pm16:
637 mov ax,18h ; Real-mode-like segment
638 mov es,ax
639 mov ds,ax
640 mov ss,ax
641 mov fs,ax
642 mov gs,ax
643
644 lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT)
645 mov eax,cr0
646 and al,~1
647 mov cr0,eax
648 jmp MY_CS:.in_rm
649.rm_jmp equ $-2
650
651.in_rm: ; Back in real mode
652 mov ax,cs
653 mov ds,ax
654 mov es,ax
655 mov fs,ax
656 mov gs,ax
657 mov ss,ax
658 mov sp,[SavedSP] ; Restore stack
659 jmp bx ; Go to whereever we need to go...
660
661call32_done:
662 call disable_a20
663 sti
664 ret
665
666;
667; 16-bit support code
668;
669 bits 16
670
671;
672; 16-bit interrupt-handling code
673;
674call32_int_rm:
675 pushf ; Flags on stack
676 push cs ; Return segment
677 push word .cont ; Return address
678 push dword edx ; Segment:offset of IVT entry
679 retf ; Invoke IVT routine
680.cont: ; ... on resume ...
681 mov bx,call32_int_resume
682 jmp call32_enter_pm ; Go back to PM
683
684;
685; 16-bit system call handling code
686;
687call32_sys_rm:
688 pop gs
689 pop fs
690 pop es
691 pop ds
692 popad
693 popfd
694 retf ; Invoke routine
695.return:
696 pushfd
697 pushad
698 push ds
699 push es
700 push fs
701 push gs
702 mov bx,call32_sys_resume
703 jmp call32_enter_pm
704
705;
706; 32-bit support code
707;
708 bits 32
709
710;
711; This is invoked on getting an interrupt in protected mode. At
712; this point, we need to context-switch to real mode and invoke
713; the interrupt routine.
714;
715; When this gets invoked, the registers are saved on the stack and
716; AL contains the register number.
717;
718call32_handle_interrupt:
719 movzx eax,al
720 xor ebx,ebx ; Actually makes the code smaller
721 mov edx,[ebx+eax*4] ; Get the segment:offset of the routine
722 mov bx,call32_int_rm
723 jmp call32_enter_rm ; Go to real mode
724
725call32_int_resume:
726 popad
727 iret
728
729;
730; Syscall invocation. We manifest a structure on the real-mode stack,
731; containing the call32sys_t structure from <call32.h> as well as
732; the following entries (from low to high address):
733; - Target offset
734; - Target segment
735; - Return offset
736; - Return segment (== real mode cs)
737; - Return flags
738;
739call32_syscall:
740 pushfd ; Save IF among other things...
741 pushad ; We only need to save some, but...
742 cld
743 call .here
744.here: pop ebp
745 sub ebp,.here
746
747 movzx edi,word [ebp+SavedSP]
748 sub edi,54 ; Allocate 54 bytes
749 mov [ebp+SavedSP],di
750 add edi,ebp ; Create linear address
751
752 mov esi,[esp+11*4] ; Source regs
753 xor ecx,ecx
754 mov cl,11 ; 44 bytes to copy
755 rep movsd
756
757 movzx eax,byte [esp+10*4] ; Interrupt number
758 ; ecx == 0 here; adding it to the EA makes the
759 ; encoding smaller
760 mov eax,[ecx+eax*4] ; Get IVT entry
761 stosd ; Save in stack frame
762 mov ax,call32_sys_rm.return ; Return offset
763 stosw ; Save in stack frame
764 mov eax,ebp
765 shr eax,4 ; Return segment
766 stosw ; Save in stack frame
767 mov eax,[edi-12] ; Return flags
768 and eax,0x200cd7 ; Mask (potentially) unsafe flags
769 mov [edi-12],eax ; Primary flags entry
770 stosw ; Return flags
771
772 mov bx,call32_sys_rm
773 jmp call32_enter_rm ; Go to real mode
774
775 ; On return, the 44-byte return structure is on the
776 ; real-mode stack. call32_enter_pm will leave ebp
777 ; pointing to the real-mode base.
778call32_sys_resume:
779 movzx esi,word [ebp+SavedSP]
780 mov edi,[esp+12*4] ; Dest regs
781 add esi,ebp ; Create linear address
782 and edi,edi ; NULL pointer?
783 jnz .do_copy
784.no_copy: mov edi,esi ; Do a dummy copy-to-self
785.do_copy: xor ecx,ecx
786 mov cl,11 ; 44 bytes
787 rep movsd ; Copy register block
788
789 add word [ebp+SavedSP],44 ; Remove from stack
790
791 popad
792 popfd
793 ret ; Return to 32-bit program