	TITLE CrLf Unix<>DOS EOL converter

Comment	~
Usage:  CRLF [-r] file1 [>output]
-r	Convert from DOS CR/LF End Of Line (EOL) to Unix LF EOL.
Default output is STDOUT (e.g., redirectable).

Written to replace the C version by Steve Creps (which was a slug)
Given to the public domain, 7 Jul 89

David Kirschbaum
Toad Hall
kirsch@braggvax.ARPA

v1.1, 3 Aug 89
 - Bug report from Erich Neuwirth <A4422DAB%AWIUNI11.BITNET@CUNYVM.CUNY.EDU>
   Unix -> DOS conversion breaks down at the BUFFSIZE boundary.
   Found bug when writing output buffer .. a constant CR or LF (in AH)
   was being lost by destroying AX during the output write.
   Fixed.  Thanks, Erich.

 - While I was at it, relocated dynamic file buffers
   to overlay the startup code and messages.

 - Changed input and output buffer sizes to eliminate the multiple tests
   of the ES:DI output buffer pointer during conversion.

   Now we read an input buffer-full, process it to the output buffer,
   and then write the full output buffer .. not worrying about overrunning
   the output buffer end (and our stack!)

   This overrun problem would only arise during Unix --> DOS conversions
   (where output is longer than input).
   There's no telling just HOW much longer the output will be than the
   input .. worst case is input * 2 (a file of all CRs converted to
   a file of all CR/LFs).

   Assuming worst case, we're making our output buffer TWICE the size
   of the input buffer (most unlikely .. but still ..).

   Surprisingly, we've gained no significant speed!  The more frequent
   input file reads offset the increase in processing speed .. sigh ...

   Toad Hall

Comment	ends	~


CR	EQU	0DH
LF	EQU	0AH
FALSE	EQU	0
TRUE	EQU	NOT FALSE

STDOUT	EQU	1			;DOS Std Out
STDERR	EQU	2			;DOS Std Err
;BUFFSIZE EQU	30000			;likely buffer size (arbitrary)
BUFFSIZE EQU	20000			;input buffer size		v1.1
					;output has space for twice that much

CSEG	SEGMENT PARA PUBLIC 'CODE'
	ASSUME	CS:CSEG,DS:CSEG,ES:CSEG

	org	80H
cmdline	label	byte

	org	100H

CrLf	proc	near
	jmp	Start			;skip over data

handle	dw	0			;input file handle

eofFlag	db	FALSE			;non-0 if EOF
addcr	db	LOW(TRUE)		;assume we're converting LF to CR/LF

unix$	db	' Unix --> DOS',CR,LF,0
dos$	db	' DOS --> Unix',CR,LF,0

u2d_warn$ db	'Warning: '
	  db	'Unix file has CRs!',CR,LF,0	;Unix -> DOS, but HAS CRs!
d2u_warn$ db	'Warning: '
	  db	'DOS file has no CRs!',CR,LF,0	;DOS -> Unix, but no CRs!

openErr$ db	'Can''t open target file',0	;input file open err msg
readErr$ db	'Input file read error',0	;input file read err msg
outErr$ db	'Output error',0		;output write error msg
switchErr$ db	'Unknown switch',0		;error msg if not -r switch
prompt$	db	'Continue? [Y/N]: ',0		;prompt string
abort$	db	'User abort',0			;abort msg

CrLf	endp


Start	proc	near

	call	Parse_CmdLine		;get any switches,
					;prepare target filename
	jc	Msg_Exit		;no action, DX -> error msg

;else DX -> target filename's first character

	mov	ax,3D00H		;open file, read only
	int	21H
	mov	dx,offset openErr$	;'Can't open target file'
	jc	Msg_Exit		;open failed, die

	mov	handle,ax		;save input file handle

	call	Test_Buffer		;initial input file read,
					;test for funny input file EOLs
	jnc	Read_1			;ok, skip the read/EOF test seq	v1.1

	 jmp	short Read_Error	;first read failed, or empty file v1.1

;We loop here, processing each buffer full, until EOF or file error.

Read_Lup:
	mov	si,offset INBUFF	;DS:SI -> input buffer base

	cmp	eofFlag,FALSE		;hit input EOF yet?
	jnz	Flush			;yep, flush any remaining processed
					;chars, terminate.

	mov	dx,si			;read into input buffer (DS:SI)
	mov	cx,BUFFSIZE		;try for a full buffer's worth
	mov	bx,handle		;input file handle
	mov	ah,3FH			;read from file/device
	int	21H
	jc	Read_Error		;read failed

	or	ax,ax			;read anything?
	jz	Flush			;nope, flush any remaining processed
					;chars, terminate.

	cmp	ax,cx			;read all we requested?
	adc	eofFlag,0		;will make flag non-0 if EOF
	mov	cx,ax			;CX = input buffer count

Read_1:
	call	Process_Buff		;convert input buffer EOLs
	call	Write_Output		;write output buffer bytes	v1.1
	jnb	Read_Lup		;went ok			v1.1
	jmp	short Write_Error	;CF means output write failed	v1.1

Flush:
	call	Write_Output		;write any output buffer bytes
	jc	Write_Error		;failed
	 xor	ax,ax			;ERRORLEVEL=0
	 jmp	short Terminate


Read_Error:
	mov	dx,offset readErr$	;'Input file read error'
	jmp	short Msg_Exit		;terminate

Write_Error:
	mov	dx,offset outErr$	;'Output error'
					;fall thru to...

;Come here with any messages in DX
Msg_Exit:
	push	ax			;save any errors in AL
	call	Write_StdErr		;output to StdErr (console)
	pop	ax

Terminate:
	mov	ah,4CH			;terminate (errorlevel in AL)
	int	21H			;we let DOS close the input file.

Start	endp


;--	Make initial test of input buffer.
;	Depending on type conversion, gives user a warning
;	if there are unexpected EOLs in the first bufferfull.

Test_Buffer	proc	near

	mov	dx,offset INBUFF	;read into input buffer
	mov	cx,BUFFSIZE		;try for a full buffer's worth
	mov	bx,handle		;input file handle
	mov	ah,3FH			;read from file/device
	int	21H
	jc	TB_Ret			;read failed, return CF set

	or	ax,ax			;read anything?
	jnz	TB_1			;yep, continue
	 stc				;zero contents ..
	 ret				;  return CF set

TB_1:
	cmp	ax,cx			;read all we requested?
	adc	eofFlag,0		;will make flag non-0 if EOF
	mov	cx,ax			;CX = input buffer count

	push	cx			;save buffer size
	mov	al,CR			;scan for CRs
	mov	di,dx	;offset INBUFF	;input buffer start
	mov	si,dx	;offset INBUFF	;may as well prepare SI
	repne	scasb			;look for a CR
	pop	cx			;restore
	pushf				;save those results


	cmp	addCr,FALSE		;adding CRs?  (Unix -> DOS)
	jz	TB_D2U			;nope, DOS -> Unix

;We're doing Unix -> DOS.
;If there's a CR in the input buffer, maybe this is NOT a Unix file!
;Warn the user.

	mov	dx,offset u2d_warn$	;'Warning: Unix file has CRs!'
	popf				;restore the scasb flag
	jnz	TB_Ok			;no CRs, ok
	jmp	short TB_Warn		;there WAS a CR.
					;Display warning msg, return

;We're doing DOS -> Unix.
;If there are no CRs in the input buffer, maybe this is NOT a DOS file!
;Warn the user.

TB_D2U:
	mov	dx,offset d2u_warn$	;'Warning: DOS file has no CRs!'
	popf				;restore the scasb flag
	jz	TB_Ok			;there WAS a CR, ok.

;Common warning routine for both conversion modes
TB_Warn:
	call	Write_StdErr		;display warning msg
	mov	dx,offset prompt$	;'Continue? [Y/N]: '
	call	Write_StdErr		;display prompt
	mov	ax,0C08H		;clear kbd, kbd input w/o echo
	int	21H
	and	al,5FH			;uppercase response
	cmp	al,'Y'			;Yes, continue?
	jz	TB_Ok			;yep
	 pop	ax			;clear the call
	 mov	al,1			;ERRORLEVEL 1
	 mov	dx,offset abort$	;'User abort'
	 jmp	Msg_Exit		;display, terminate

TB_Ok:
	mov	di,offset OUTBUFF	;ES:DI -> output buffer base
	clc				;but return CF clear
TB_Ret:
	ret

Test_Buffer	endp


;--	Tests type conversion, jumps to appropriate conversion procedure.

Process_Buff	proc	near

	mov	bx,offset Unix_To_Dos	;assume Unix -> DOS conversion
	cmp	addcr,TRUE		;adding CRs?  (Unix -> DOS)
	jz	PB_Jump			;yep
	 mov	bx,offset Dos_To_Unix	;nope, CR/LF to LF conversion
PB_Jump:
	jmp	bx			;return from whichever procedure

Process_Buff	endp


;--	Converts DOS CR/LF EOLs to Unix-style EOLs (LF)
;	DS:SI -> input buffer start
;	ES:DI -> next free output buffer byte
;	CX = bytes read (e.g., size of input buffer)
;	Destroys most everything
;	Preserves DI (output buffer pointer)
;	Removed output buffer overrun testing at every byte.

Dos_To_Unix	proc	near

	mov	ah,CR			;handy constant

D2U_Lup:
	lodsb				;snarf input byte
	cmp	al,ah	;CR		;DOS EOL first char?
	jz	D2U_Relup		;yep, gobble that CR
	 stosb				;stuff normal char or LF
D2U_Relup:
	loop	D2U_Lup			;do all the input characters.
	ret

Dos_To_Unix	endp


;--	Converts Unix-style EOLs (LF) to normal DOS CR/LF EOL
;	DS:SI -> input buffer start
;	ES:DI -> next free output buffer byte
;	CX = bytes read (e.g., size of input buffer)
;	Destroys most everything.
;	Preserves DI
;v1.1	Added CR/LF word stuffing.  Some code is redundant,
;	but this maximizes speed at minimal code increase.
;	Removed output buffer overrun testing at every byte.

Unix_To_Dos	proc	near

	mov	ah,LF			;handy constant

U2D_Lup:
	lodsb				;snarf input byte
	cmp	al,ah	;LF		;Unix EOL?
	jz	U2D_EOL			;yep
	 stosb				;stuff normal char
	 loop	U2D_Lup			;reloop
	 ret

U2D_EOL:
	mov	al,CR			;stuff CR/LF
	stosw				;as a word
	loop	U2D_Lup			;redundant code, but faster
	ret

Unix_To_Dos	endp


;--	Write output buffer to StdOut
;	Output buffer size may be bigger (Unix --> DOS)
;	or smaller (DOS --> Unix) than input buffer size.
;	Return CF set if error (with error in AX)
;	Destroys AX,BX,DX
;	Returns DI -> output buffer start

Write_Output	proc	near

	mov	dx,offset OUTBUFF	;output buffer start
	mov	ax,di			;output buffer's last byte+1
	sub	ax,dx			;last byte (+1) - start=bytes to write
	ja	Write_Out1		;ok, we have output to write
	 xor	ax,ax			;nothing to write
	 clc				;insure CF clear
	 ret

Write_Out1:
	push	cx			;preserve CX
	mov	cx,ax			;bytes to write
	mov	bx,STDOUT		;output to StdOut
	mov	ah,40H			;write to file/device
	int	21H
	pop	cx			;restore CX
	mov	di,dx			;ES:DI -> output buffer start
	ret				;CF set if write error

Write_Output	endp


;--	Enter with DS:DX -> AsciiZ message.
;	Writes msg to StdErr
;	Destroys AX,BX

Write_StdErr	proc	near

	push	di
	push	cx

	xor	al,al			;scan for AsciiZ 0
	mov	cx,0FFFFH		;max scan
	mov	di,dx			;ES:DI -> message's first char
	repne	scasb			;find AsciiZ 0
	not	cx			;flip, CX = msg length

	mov	bx,STDERR		;write to StdErr
	mov	ah,40H			;write to file/device
	int	21H

	pop	cx
	pop	di
	ret

Write_StdErr	endp


;Runtime file buffers start here,
;and will overwrite startup code (Parse_CmdLine)
;and usage message.

	EVEN					;make it easy for 8086 family

INBUFF		label	byte			;input buffer start
OUTBUFF		EQU	INBUFF + BUFFSIZE	;output buffer start
;v1.1	OUTBUFF has about BUFFSIZE*2 bytes to play with.
;OUTBUFFEND	EQU	OUTBUFF+BUFFSIZE	;mark output buffer end

usage$	db 'CRLF v1.1 - Convert Unix LF line endings to DOS CR/LF endings.'
	db CR,LF
	db 'Usage:  CRLF [-r] filename.typ [>output]',CR,LF
	db 'Where',CR,LF
	db ' -r              reverses the operation (CR/LF to LF)',CR,LF
	db ' filename.typ    is the target filename',CR,LF
	db 'Default output is to STDOUT (redirect to any file/device).'
	db CR,LF,0


;--	Parse PSP command line for -r switch and target filename.
;	Return CF set if errors, no output, whatever.

Parse_CmdLine	proc	near

	mov	si,offset cmdline	;PSP cmdline length byte
	xor	ah,ah			;clear msb
	lodsb				;snarf length byte
	mov	cx,ax			;CX=cmdline length
	mov	dx,offset usage$	;assume no cmdline
	jcxz	PC_Bad			;return CF set

	call	Next_Char		;gobble any spaces, tabs
	jcxz	PC_Bad			;went illegal

;AL = first real cmdline char
;SI -> next cmdline char
;CX = remaining cmdline length

	cmp	al,'-'			;got a switch?
	jz	PC_Switch		;yep
	cmp	al,'/'			;be nice, test for other switch
	jz	PC_Switch
	cmp	al,'?'			;asking for help?
	jz	PC_Bad			;yep, DX -> usage msg
	jmp	short PC_FileName	;should be target filename's first char

;We got a switch
PC_Switch:
	call	Next_Char		;get next char
	jcxz	PC_Bad			;usage, die

	mov	dx,offset switchErr$	;'Unknown switch'
	and	al,5FH			;uppercase
	cmp	al,'R'			;we only take 'R' switches for now
	jnz	PC_Bad			;bad

	mov	dx,offset usage$	;if no filename, usage msg
	not	addCr			;flip flag to CR/LF -> LF conversion
	call	Next_Char		;filename should be next
	jcxz	PC_Bad

PC_FileName:
	dec	si			;back up to filename's first char
	mov	dx,si			;remember in DX
	mov	cx,80H			;should be long enough!
	mov	ah,CR			;look for terminating CR
PC_FNLup:
	lodsb				;snarf next char
	cmp	al,ah	;CR		;hit CR?
	jnz	PC_FNLup		;nope
	dec	si			;back up to the CR
	mov	byte ptr [si],0		;AsciiZe it
	
	call	Write_StdErr		;display filename
	push	dx			;save filename ptr
	mov	dx,offset unix$		;assume 'Unix -> DOS' EOL conversion
	cmp	addCr,LOW(TRUE)		;true?
	jz	PC_1			;yep
	 mov	dx,offset dos$		;'DOS -> Unix'
PC_1:
	call	Write_StdErr		;display msg
	pop	dx			;restore filename ptr
	clc				;return CF clear
	ret
	
PC_Bad:
	stc				;return CF set for failure
	ret				;DX -> error msg


;Parse_CmdLine subroutine

Next_Char:
	jcxz	NC_Ret			;cmdline zeroed out, return

NC_Lup:
	lodsb				;snarf cmdline char
	cmp	al,' '			;space?
	jz	NC_ReLup		;yep, gobble
	cmp	al,9			;tab?
	jz	NC_ReLup		;yep, gobble
	cmp	al,CR			;CR terminates
	jnz	NC_Ret			;normal char, return

NC_ReLup:
	loop	NC_Lup
NC_Ret:
	ret


Parse_CmdLine	endp

CSEG	ENDS
	END	CrLf
