; This program is made by Daniel Horchner.
; email: dbjh@gmx.net
;
; This is a _basic_ multitasking _example_ using the hardware of the 386.
; Bit 4 in byte 6 of the task's descriptor is used as a flag. It will be
; referred to hereafter as the 'active' bit. This bit is available for use,
; it is not reserved by Intel. The scheduler checks only one bit to determine
; if a task can be multitasked; the Busy bit. The busy bit is set by the
; processor if a task is running and cleared if not or if the task hasn't
; started yet. This program starts a task by setting the Busy bit itself and
; so makes the task active. This is done by the function 'activate_task'. The
; function 'suspend_task' clears the Busy bit. It is the responsibility of
; these two functions to check if the Busy bit can be set or reset. They use
; the active bit in addition to the Busy bit to be sure whether the Busy bit
; was reset by suspend_task or was reset by the processor because the task
; ended. They both return an error if something went wrong by setting the
; Carry Flag. The active bit in the Task State Segment descriptors is 0 at
; startup. An active bit value of 0 is interpreted by activate_task as if the
; task has been suspended, so the tasks are initially made active by calling
; activate_task (no extra function 'start_task' is needed). activate_task
; only sets the active bit in the task's descriptor. In order for the task to
; actually multitask with other tasks, the task's selector must be present in
; TSSarray.

segment code32 public align=16 use32

%include "raw32.inc"

;32-bit data
TSS1            TSS
                times 2000h db 0        ; 1 bit for each port; 64K / 8 = 8K
TSS2            TSS
                times 2000h db 0
TSS3            TSS
                times 2000h db 0
schedulerTSS    TSS
                times 2000h db 0

LDTdsc          seg_descriptor  0,0,0, 82h, 0           ; dsc type 2=LDT
LDTsel          dw      0
TSS1dsc         seg_descriptor  2067h,0,0, 89h, 0       ; dsc type 9=TSS
TSS2dsc         seg_descriptor  2067h,0,0, 89h, 0       ; all dscs: active=0
TSS3dsc         seg_descriptor  2067h,0,0, 89h, 0       ; 68h bytes for task
schedulerTSSdsc seg_descriptor  2067h,0,0, 89h, 0       ;  state + 2000h
TSS1sel         dw      0                               ;  bytes for I/O map
TSS2sel         dw      0
TSS3sel         dw      0
schedulerTSSsel dw      0

mPICmask        db      0               ; original master PIC mask
sPICmask        db      0               ; original slave PIC mask
org_tr          dw      0               ; original Task Register contents
gdtr            dw      0,0,0           ; 16-bit limit, 32-bit base
LDT             dd      0               ; pointer to mem allocated for LDT
org_IRQ0        dd      0               ; offset and selector of original
                dw      0               ;  IRQ 0 handler
MAX_N_TASKS     equ     3               ; (max) number of tasks to multitask
TSSarray        times MAX_N_TASKS dw 0  ; array of selectors of tasks
runTSS          dd      0               ; index in TSSarray of task to run
timercntr       dd      0               ; counter incremented on every IRQ 0 
DELAYCNT        equ     91              ; 91/18.25 seconds
TASK2CNTR       equ     0800000h
TASK3CNTR       equ     0100000h

msg0            db      'Not running at Privilege Level 0.','$'
msg1            db      'Message from task1! ',0
msg2            db      'Message from task2! ',0
msg3            db      'Message from task3! ',0
msg4            db      "Can'",'t activate task; it already ended.',0
msg5            db      'Press a key to continue.',0
msg6            db      'task2 continued beyond its designed end!',0
msg7            db      'task3 continued beyond its designed end!',0

;32-bit code
main:
                                        ; First, check if running at PL 0
        mov ebx,cs
        lar ecx,ebx
        and ecx,6000h                   ; Only the DPL bits are needed
        jz short .PL0
        mov edx,msg0
        call dosprint
        jmp @exit
.PL0:
;
                                        ; Make a copy of the GDT to create a
        sgdt [gdtr]                     ;  LDT; Make all descriptors PL 3
        movzx ecx,word [gdtr]
        mov [LDTdsc.limit0_15],cx       ; ecx=GDT limit in bytes
        inc ecx                         ; ecx=GDT size in bytes
        mov eax,ecx
        call getlomem                   ; LOmem -> Handy when using TD ;)
        mov [LDT],eax                   ; Save ptr to mem allocated for LDT
        mov edi,eax
        push ds
        pop es               
        mov esi,[gdtr+2]
        mov ds,[zerosel]
        rep movsb
        mov ds,[cs:data32sel]           ; Restore ds
                                        ; Change the DPL field of the
        movzx ecx,word [gdtr]           ;  descriptors to PL 3
        inc ecx
        shr ecx,3                       ; ecx=number of selectors in GDT/LDT
        mov edi,[LDT]
next_descriptor:
        mov ebx,ecx
        dec ebx
        or byte [edi+ebx*8+5],3 << 5
        loop next_descriptor            ; PL 3; DPL bits are bits 5 and 6

        mov cx,1                        ; Allocate 1 descriptor for LDT
        call getdsc
        jc near @exit
        mov [LDTsel],ax

        mov eax,[code32a]
        add eax,[LDT]
        mov [LDTdsc.base0_15],ax
        shr eax,16
        mov [LDTdsc.base16_23],al
        mov [LDTdsc.base24_31],ah

        mov edi,LDTdsc                  ; es:edi=pointer to 8 byte dsc buffer
        mov bx,[LDTsel]                 ; bx=selector
        call setdsc
        jc near @exit
                                        ; Store new LDTR in current TSS ->
        xor ebx,ebx                     ;  avoids problems with 386SWAT
        str bx
        add ebx,[gdtr+2]
        sub ebx,[code32a]               ; ebx=address of TSS descriptor

        mov dh,[ebx+7]
        mov dl,[ebx+4]
        shl edx,16
        mov dx,[ebx+2]
        sub edx,[code32a]               ; edx=address of TSS

        mov ax,[LDTsel]
        mov [edx+60h],ax                ; LDTR at TSS[60h]

        lldt ax
;
        mov cx,4                        ; Allocate 4 descriptors for TSS's
        call getdsc
        jc near @exit
        mov [TSS1sel],ax
        add ax,8
        mov [TSS2sel],ax
        add ax,8
        mov [TSS3sel],ax
        add ax,8
        mov [schedulerTSSsel],ax

        mov eax,[code32a]
        add eax,TSS1
        mov [TSS1dsc.base0_15],ax
        shr eax,16
        mov [TSS1dsc.base16_23],al
        mov [TSS1dsc.base24_31],ah
;
        mov eax,[code32a]
        add eax,TSS2
        mov [TSS2dsc.base0_15],ax
        shr eax,16
        mov [TSS2dsc.base16_23],al
        mov [TSS2dsc.base24_31],ah
;
        mov eax,[code32a]
        add eax,TSS3
        mov [TSS3dsc.base0_15],ax
        shr eax,16
        mov [TSS3dsc.base16_23],al
        mov [TSS3dsc.base24_31],ah
;
        mov eax,[code32a]
        add eax,schedulerTSS
        mov [schedulerTSSdsc.base0_15],ax
        shr eax,16
        mov [schedulerTSSdsc.base16_23],al
        mov [schedulerTSSdsc.base24_31],ah

        mov edi,TSS1dsc
        mov bx,[TSS1sel]                ; bx=selector
        call setdsc
        jc near @exit
        mov edi,TSS2dsc
        mov bx,[TSS2sel]
        call setdsc
        jc near @exit
        mov edi,TSS3dsc
        mov bx,[TSS3sel]
        call setdsc
        jc near @exit
        mov edi,schedulerTSSdsc
        mov bx,[schedulerTSSsel]
        call setdsc
        jc near @exit

        movzx ebx,byte [IRQ0_vect]      ; IRQ 0 (timer) is mapped to
        call getvect                    ;  interrupt IRQ0_vect
        mov [org_IRQ0+4],cx             ; cx:edx=addr of exception handler
        mov [org_IRQ0],edx
                                        ; Change int gate dsc for timer
        cli                             ;  handler into a task gate -> The
        sub esp,6                       ;  scheduler runs in it's own context
        sidt [esp]                      ; idtr=16-bit limit, 32-bit base
        mov edx,[esp+2]                 ; edx=linear address of IDT
;       mov gs,[zerosel]                ; Already done in extender
        mov ax,[schedulerTSSsel]        ; The timer calls the scheduler...
        mov [gs:edx+ebx*8+2],ax         ; Set selector field
        mov al,[gs:edx+ebx*8+5]         ; Get access rights byte
        and al,0e0h                     ; Clear 5 type bits
        or al,5                         ; dsc type 5=task gate
        mov [gs:edx+ebx*8+5],al         ; Set access rights byte
        add esp,6                       ; Free stack space

        in al,21h                       ; Save PIC masks
        mov [mPICmask],al
        in al,0a1h
        mov [sPICmask],al
        mov al,0ffh                     ; Mask all IRQs...
        out 0a1h,al
        and al,~ 1                      ; ...except IRQ 0 (timer)
        out 21h,al

        str [org_tr]                    ; Save original Task Register

        mov eax,cr3                     ; CR3 and LDTR are loaded but not
        mov [TSS1.@cr3],eax             ;  stored on a task switch
        mov [TSS2.@cr3],eax
        mov [TSS3.@cr3],eax
        mov [schedulerTSS.@cr3],eax
        sldt [TSS1.ldtr]
        sldt [TSS2.ldtr]
        sldt [TSS3.ldtr]
        sldt [schedulerTSS.ldtr]
        ltr [TSS1sel]                   ; TSS1sel = current task

        pushfd                          ; IF is cleared -> right value;
        pop eax                         ;  scheduler must not be interrupted
        mov [schedulerTSS.eflags],eax
        mov [schedulerTSS.@ds],ds
        mov [schedulerTSS.@es],es
        mov [schedulerTSS.@fs],fs
        mov [schedulerTSS.@gs],gs       ; Be sure gs=zerosel!
        mov [schedulerTSS.@ss],ss
        mov [schedulerTSS.ss0],ss       ; Save PL 0 ss and esp (not necessary
        mov [schedulerTSS.esp0],esp     ;  if scheduler is PL 0)
        mov eax,200h                    ; Allocate a 200h stack
        call gethimem
        jc near cleanup
        add eax,200h                    ; Stack grows down...
        mov [schedulerTSS.@esp],eax
        mov [schedulerTSS.@cs],cs
        mov dword [schedulerTSS.eip],scheduler

        sti                             ; From now the scheduler is called
        pushfd
        pop eax
        mov [TSS2.eflags],eax           ; Change all segment registers for
        mov eax,ds                      ;  their PL 3 equivalent in the LDT
        or eax,7                        ;  If they are not made PL 3 all
        mov [TSS2.@ds],eax              ;  these regs will be set to zero
        mov eax,es
        or eax,7                        ; Set Table Indicator bit; Set RPL to
        mov [TSS2.@es],eax              ;  3
        mov eax,fs
        or eax,7
        mov [TSS2.@fs],eax
        mov eax,gs
        or eax,7
        mov [TSS2.@gs],eax
        mov eax,ss
        or eax,7
        mov [TSS2.@ss],eax
        mov [TSS2.ss0],ss               ; Save PL 0 ss
        mov [TSS2.esp0],esp             ; Save PL 0 esp
        mov eax,200h                    ; Allocate a 200h stack
        call gethimem
        jc near cleanup
        add eax,200h                    ; Stack grows down...
        mov [TSS2.@esp],eax
        mov eax,cs
        or eax,7                        ; Set TI bit; Set RPL to 3
        mov [TSS2.@cs],eax
        mov dword [TSS2.eip],task2
;       call far [TSS2sel-4]            ; Switch to another task ('task2:')
                                        ; Provide a proper ending for task2
        mov ax,[TSS1sel]                ; Set backlink (necessary if switch
        mov [TSS2.backlink],ax          ;  to TSS2 is commented out)
        or dword [TSS2.eflags],4000h    ; Set NT flag -> backlink will be used

        pushfd
        pop eax
        mov [TSS3.eflags],eax
        mov eax,ds
        or eax,7                        ; Set TI bit; Set RPL to 3
        mov [TSS3.@ds],eax
        mov [TSS3.@es],eax
        mov [TSS3.@fs],eax
        mov [TSS3.@gs],eax
        mov [TSS3.@ss],eax
        mov [TSS3.ss0],ss               ; Save PL 0 ss
        mov [TSS3.esp0],esp             ; Save PL 0 esp
        mov eax,200h                    ; Allocate a 200h stack
        call gethimem
        jc near cleanup
        add eax,200h                    ; Stack grows down...
        mov [TSS3.@esp],eax
        mov eax,cs
        or eax,7                        ; Set TI bit; Set RPL to 3
        mov [TSS3.@cs],eax
        mov dword [TSS3.eip],task3
;       call far [TSS3sel-4]            ; Switch to another task ('task3:')
                                        ; Provide a proper ending for task3
        mov ax,[TSS1sel]                ; Set backlink (necessary if switch
        mov [TSS3.backlink],ax          ;  to TSS3 is commented out)
        or dword [TSS3.eflags],4000h    ; Set NT flag -> backlink will be used
;
        mov ax,[TSS1sel]
        mov [TSSarray],ax
        mov ax,[TSS2sel]
        mov [TSSarray+1*2],ax
        mov ax,[TSS3sel]
        mov [TSSarray+2*2],ax
                                        ; Start multitasking the current task
        mov bx,[TSS2sel]                ;  with task2 and task3; The current
        call activate_task              ;  task is already active
        mov bx,[TSS3sel]
        call activate_task

        mov esi,msg1
        @rlp edi,0b8000h+1*160
        mov bl,1fh
        call putstr
        @rlp edi,0b8000h+1*160+19*2
        mov dword [timercntr],0         ; Reset counter
.wait:
        inc byte [edi]                  ; Show activity
        cmp dword [timercntr],DELAYCNT
        jb short .wait

        mov bx,[TSS2sel]                ; Suspend multitasking task2
        call suspend_task

        mov dword [timercntr],0
.wait2:
        inc byte [edi]
        cmp dword [timercntr],DELAYCNT
        jb short .wait2
                                        ; Continue multitasking task2 with
        mov bx,[TSS2sel]                ;  other tasks (if not already ended)
        call activate_task
        jnc short .wait_more
        mov esi,msg4
        @rlp edi,0b8000h+6*160
        mov bl,1fh
        call putstr
        jmp cleanup

.wait_more:
        mov dword [timercntr],0
.wait3:
        inc byte [edi]
        cmp dword [timercntr],DELAYCNT
        jb short .wait3
;
cleanup:
        cli                             ; Restore original timer handler
        movzx ebx,word [IRQ0_vect]
        mov cx,[org_IRQ0+4]             ; cx:edx=addr of exception handler
        mov edx,[org_IRQ0]
        call setvect
                                        ; Change task gate dsc for timer
        sub esp,6                       ;  handler back into an int gate dsc
        sidt [esp]                      ; idtr=16-bit limit, 32-bit base
        mov edx,[esp+2]                 ; edx=linear address of IDT
        mov al,[gs:edx+ebx*8+5]         ; Get access rights byte
        and al,0e0h                     ; Clear 5 type bits
        or al,0eh                       ; dsc type eh=interrupt gate
        mov [gs:edx+ebx*8+5],al         ; Set access rights byte
        add esp,6                       ; Free stack space

        mov eax,[gdtr+2]
        movzx ebx,word [org_tr]
        and ebx,~ 3                     ; ebx=offset of TSS dsc in GDT
        and byte [gs:eax+ebx+5],~ 2     ; Clear Busy bit
        ltr [org_tr]                    ; Restore original Task Register
        mov al,[mPICmask]               ; Restore original PIC masks
        out 21h,al
        mov al,[sPICmask]
        out 0a1h,al
        sti

        mov esi,msg5
        @rlp edi,0b8000h+7*160
        mov bl,1fh
        call putstr
        mov byte [v86r_ah],0            ; ah=0 -> Wait for key and read char
        mov al,16h
        int RMCALL_VECT

@exit:
        jmp exit                        ; Return to real/V86 mode

;
; Scheduler
; In:
;   ds = data32sel
;   gs = zerosel
;   gdtr = value of the Global Descriptor Table Register
;
; Note:
;   This scheduler checks if there is a task in TSSarray to switch to. If not
;   it just returns to the task it interrupted (in TSS backlink field).
;
scheduler:
;        @rlp edi,0b8000h+1*160+158      
;        inc byte [edi]                  ; Show activity
        inc dword [timercntr]

        mov esi,[gdtr+2]
        mov edx,[runTSS]                ; Save number of first task to check
.check_runTSS:                          ;  -> If no task to switch to, no
        inc dword [runTSS]              ;  endless loop
        cmp dword [runTSS],MAX_N_TASKS
        jb short .runTSSok
        mov dword [runTSS],0
.runTSSok:
        mov edi,[runTSS]
        cmp edx,edi
        je short .backlink_ok           ; None of the tasks were busy
        movzx ebx,word [TSSarray+edi*2]
        cmp ebx,0
        jz short .check_runTSS
        and bl,~ 3                      ; Clear RPL
        test byte [gs:esi+ebx+5],2      ; Check if task is active (Busy bit
        jz short .check_runTSS          ;  set); if not -> Don't run task
        mov [schedulerTSS.backlink],bx
.backlink_ok:

        mov al,20h                      ; 20h=non-specific EOI cmd for IC reg
        out 20h,al                      ; Send EOI to master PIC
        iretd                           ; Switch to next task (in TSSarray)
        jmp scheduler
;
task2:
        mov esi,msg2
        @rlp edi,0b8000h+2*160
        mov bl,1fh
        call putstr
        mov ecx,TASK2CNTR
;        mov eax,cr0                     ; Exception 13 if this is PL > 0 code
        @rlp edi,0b8000h+2*160+19*2
.lp:
        inc byte [edi]                  ; Show activity
        loop .lp
        iretd

        mov esi,msg6
        @rlp edi,0b8000h+4*160
        mov bl,1fh
        call putstr
        iretd
;
task3:
        mov esi,msg3
        @rlp edi,0b8000h+3*160
        mov bl,1fh
        call putstr
        mov ecx,TASK3CNTR
;        mov eax,cr0                     ; Exception 13 if this is PL > 0 code
        @rlp edi,0b8000h+3*160+19*2
.lp:
        inc byte [edi]                  ; Show activity
        loop .lp
        iretd

        mov esi,msg7
        @rlp edi,0b8000h+5*160
        mov bl,1fh
        call putstr
        iretd

;
; Activate task
; In:
;   ds = data32sel
;   gs = zerosel
;   bx = selector of task to activate
;   gdtr = value of the Global Descriptor Table Register
; Out:
;   CF=0 -> Busy bit and active bit are set -> task activated
;   CF=1 -> active bit was set but Busy bit was clear -> task ended
;
; Note:
;   activate_task sets the Busy bit only if the active bit is clear. In order
;   for the task to actually multitask with other tasks, the task's selector
;   must be present in TSSarray.
;
activate_task:
        push esi
        push ebx
        mov esi,[gdtr+2]
        and ebx,0fffch                  ; Clear RPL
        clc                             ; Carry Flag set only on error

        test byte [gs:esi+ebx+6],10h    ; Test active bit
        jnz short .check_busy
                                        ; Task is suspended
        or byte [gs:esi+ebx+6],10h      ; Set active bit
        or byte [gs:esi+ebx+5],2        ; Set Busy bit -> possible to switch
        jmp short .exit                 ;  to this task using IRETD

.check_busy:                            ; Check if task ended
        test byte [gs:esi+ebx+5],2
        jnz short .exit                 ; zero -> active and Busy already set

        stc                             ; Task ended, can't be activated ->
                                        ;  set Carry Flag                             
.exit:
        pop ebx
        pop esi
        ret

;
; Suspend task
; In:
;   ds = data32sel
;   gs = zerosel
;   bx = selector of task to suspend
;   gdtr = value of the Global Descriptor Table Register
; Out:
;   CF=0 -> Busy bit and active bit are cleared -> task suspended
;   CF=1 -> active bit was set but Busy bit was clear -> task already ended
;        -> active bit was clear but Busy bit was set (happens only if code
;           modifies the descriptor fields itself without using activate_task
;           or suspend_task)
;
; Note:
;   activate_task checks the active bit. It will set the Busy bit only if
;   this bit is reset -> The scheduler won't switch task to a task that has
;   ended and of which it's eip contains an invalid value.
;
suspend_task:
        push esi
        push ebx
        mov esi,[gdtr+2]
        and ebx,0fffch                  ; Clear RPL
        clc                             ; Carry Flag set only on error

        test byte [gs:esi+ebx+6],10h    ; Test active bit
        jz short .check_busy            ; zero -> Task (should be) suspended

        test byte [gs:esi+ebx+5],2      ; Test Busy bit
        jz short .task_end
                                        ; Task active -> Suspend it
        and byte [gs:esi+ebx+6],~ 10h   ; Clear active bit
        and byte [gs:esi+ebx+5],~ 2     ; Clear Busy bit -> scheduler skips
        jmp short .exit                 ;  this task

.task_end:                              ; Task ended, don't clear active bit
        stc                             ;  -> activate_task can detect that
        jmp short .exit                 ;  task ended (active!=Busy)

.check_busy:                            ; Check if task is busy
        test byte [gs:esi+ebx+5],2
        jz short .exit                  ; zero -> active and Busy already
                                        ;  clear

        stc                             ; active bit clear but Busy bit set
                                        ;  -> ? -> set Carry Flag!
.exit:
        pop ebx
        pop esi
        ret
