Discussion:
Awk program to uppercase Oberon keywords
(too old to reply)
William James
2004-10-26 19:29:17 UTC
Permalink
# Awk program to convert Oberon keywords that are in lowercase
# to uppercase. Handles nested comments.
# Caution: if you create identifiers that contain only lowercase
# letters and that are spelled the same as an Oberon keyword,
# they will be uppercased. (Obviously.)

BEGIN {
quote = "\""
open_comment = "[(][*]"
close_comment = "[*][)]"
openers = quote "|" open_comment
sought = openers
setup_words()
}

## ----- Main loop. -----

{
while ( match( $0, sought ) )
{ Match( $0 )
$0 = RRIGHT
matched = RMATCH
process( RLEFT )
output( matched )
change_state( matched )
}
process( $0 )
output( "\n" )
}

## ----- Functions. -----

function change_state( s )
{
if ( s ~ open_comment )
{ in_comment++
sought = open_comment "|" close_comment
}
else if ( s ~ close_comment )
{ if ( --in_comment == 0 )
sought = openers
}
else if ( in_quote )
{ in_quote = 0
sought = openers
}
else
{ in_quote = 1
sought = quote
}
}

function process( s, accum )
{
accum = ""
if ( in_comment || in_quote )
accum = s
else
{
while ( match( s, /[a-zA-Z_]+/ ) )
{ Match( s )
if ( RMATCH in words )
RMATCH = toupper( RMATCH )
accum = accum RLEFT RMATCH
s = RRIGHT
}
accum = accum s
}
output( accum )
}

function output( s )
{ printf "%s", s
}

function setup_words( s,temp,i )
{
s = "array,begin,case,const,div,do,else,elsif,end,exit,if,import,in,is" \
",loop,mod,module,nil,of,or,pointer,procedure,record,repeat,return" \
",then,to,type,until,var,while,with,abs,ash,boolean,cap,char,chr,copy" \
",dec,entier,excl,false,halt,inc,incl,integer,len,long,longint" \
",longreal,max,min,new,odd,ord,real,set,short,shortint,size,true"

split( s, temp, /,/ )
for (i=1; i in temp; i++)
words[temp[i]] = 1
}

function Match( s )
{
if ( RSTART )
{ RMATCH = substr(s, RSTART, RLENGTH )
RLEFT = substr(s, 1, RSTART - 1)
RRIGHT = substr(s, RSTART + RLENGTH )
}
else
{ RMATCH = RLEFT = ""
RRIGHT = s
}
return RSTART
}
William James
2004-10-27 20:21:39 UTC
Permalink
New:
- Handles strings delimited by ' .
- Checks for unclosed comments and strings.

-----------------------------------------------------------------

# Awk program to convert Oberon keywords that are in lowercase
# to uppercase. Handles nested comments.
# Caution: if you create identifiers that contain only lowercase
# letters and that are spelled the same as an Oberon keyword,
# they will be uppercased. (Obviously.)
# Run with
# awk -f ucaseOberon.awk infile >outfile

BEGIN {
err_device = "/dev/tty"
if ( ENVIRON["ComSpec"] || ENVIRON["COMSPEC"] )
err_device = "/dev/stderr"
quote = "\"|'"
open_comment = "[(][*]"
close_comment = "[*][)]"
openers = quote "|" open_comment
sought = openers
setup_words()
}

## ----- Main loop. -----

{
while ( match( $0, sought ) )
{ Match( $0 )
$0 = RRIGHT
matched = RMATCH
process( RLEFT )
output( matched )
change_state( matched )
}
process( $0 )
output( "\n" )
}

## ----- After end of file has been reached. -----

END {
if ( !errors )
{ if ( in_quote )
error( "File ended before literal string was closed." )
if ( in_comment )
error( "Comment that was begun in line " comment_start " never ended." )
}
exit errors
}


## ----- Functions. -----

function change_state( s )
{
if ( s ~ open_comment )
{ ## For error-reporting, remember line where comment started.
if ( !in_comment )
comment_start = FNR
in_comment++
sought = open_comment "|" close_comment
}
else if ( s ~ close_comment )
{ if ( --in_comment == 0 )
sought = openers
}
else if ( in_quote )
{ if ( FNR != quote_start )
{ error( "Quote that started in line " quote_start " ended in line " \
FNR "." )
exit
}
in_quote = 0
sought = openers
}
else
{ in_quote = 1
sought = s
quote_start = FNR
}
}

function process( s, accum )
{
accum = ""
if ( in_comment || in_quote )
accum = s
else
{
while ( match( s, /[a-zA-Z_]+/ ) )
{ Match( s )
if ( RMATCH in words )
RMATCH = toupper( RMATCH )
accum = accum RLEFT RMATCH
s = RRIGHT
}
accum = accum s
}
output( accum )
}

function output( s )
{ printf "%s", s
}

function setup_words( s,temp,i )
{
s = "array,begin,case,const,div,do,else,elsif,end,exit,if,import,in,is" \
",loop,mod,module,nil,of,or,pointer,procedure,record,repeat,return" \
",then,to,type,until,var,while,with,abs,ash,boolean,cap,char,chr,copy" \
",dec,entier,excl,false,halt,inc,incl,integer,len,long,longint" \
",longreal,max,min,new,odd,ord,real,set,short,shortint,size,true" \
",assert,system,byte,ptr,adr,bit,cc,lsh,rot,val,get,put" \
",getreg,putreg,move"

split( s, temp, /,/ )
for (i=1; i in temp; i++)
words[temp[i]] = 1
}

function Match( s )
{
if ( RSTART )
{ RMATCH = substr(s, RSTART, RLENGTH )
RLEFT = substr(s, 1, RSTART - 1)
RRIGHT = substr(s, RSTART + RLENGTH )
}
else
{ RMATCH = RLEFT = ""
RRIGHT = s
}
return RSTART
}

function error( s )
{ printf "\nError in file %s, line %d:\n", FILENAME, FNR >"/dev/stderr"
print " " s >err_device
errors++
}
William James
2004-10-29 18:14:31 UTC
Permalink
Final version, I hope.

Added support for qualified identifiers and for identifiers written
with a mixture of upper and lower case (e.g., Out.LongReal).

------------------------------------------------------------------

# Awk program to convert Oberon keywords that are in lowercase
# to uppercase. Handles nested comments.
# Caution: if you create identifiers that contain only lowercase
# letters and that are spelled the same as an Oberon keyword,
# they will be uppercased. (Obviously.)
# Run with
# awk -f ucaseOberon.awk infile >outfile

BEGIN {
err_device = "/dev/tty"
if ( ENVIRON["ComSpec"] || ENVIRON["COMSPEC"] )
err_device = "/dev/stderr"
quote = "\"|'"
open_comment = "[(][*]"
close_comment = "[*][)]"
openers = quote "|" open_comment
sought = openers
setup_words()
}

## ----- Main loop. -----

{
while ( match( $0, sought ) )
{ Match( $0 )
$0 = RRIGHT
matched = RMATCH
process( RLEFT )
output( matched )
change_state( matched )
}
process( $0 )
output( "\n" )
}

## ----- After end of file has been reached. -----

END {
if ( !errors )
{ if ( in_quote )
error( "File ended before literal string was closed." )
if ( in_comment )
error( "Comment that was begun in line " comment_start " never ended." )
}
exit errors
}


## ----- Functions. -----

function change_state( s )
{
if ( s ~ open_comment )
{ ## For error-reporting, remember line where comment started.
if ( !in_comment )
comment_start = FNR
in_comment++
sought = open_comment "|" close_comment
}
else if ( s ~ close_comment )
{ if ( --in_comment == 0 )
sought = openers
}
else if ( in_quote )
{ if ( FNR != quote_start )
{ error( "Quote that started in line " quote_start " ended in line " \
FNR "." )
exit
}
in_quote = 0
sought = openers
}
else
{ in_quote = 1
sought = s
quote_start = FNR
}
}

function process( s, accum )
{
accum = ""
if ( in_comment || in_quote )
accum = s
else
{
while ( match( s, /[a-zA-Z._]+/ ) )
{ Match( s )
if ( RMATCH in ucased_words )
RMATCH = toupper( RMATCH )
else if ( RMATCH in mixed_case_words )
RMATCH = mixed_case_words[ RMATCH ]
accum = accum RLEFT RMATCH
s = RRIGHT
}
accum = accum s
}
output( accum )
}

function output( s )
{ printf "%s", s
}

function setup_words( s,temp,i )
{
s = "array,begin,by,case,const,div,do,else,elsif,end,exit,for,if" \
",import,in,is,loop,mod,module,nil,of,or,pointer,procedure" \
",record,repeat,return,then,to,type,until,var,while,with,abs" \
",ash,boolean,cap,char,chr,copy,dec,entier,excl,false,halt,inc" \
",incl,integer,len,long,longint,longreal,max,min,new,odd,ord" \
",real,set,short,shortint,size,true,assert,system" \
",system.byte,system.ptr,system.adr,system.bit" \
",system.cc,system.lsh,system.rot,system.val,system.get" \
",system.put,system.getreg,system.putreg,system.move"

split( s, temp, /,/ )
for (i=1; i in temp; i++)
ucased_words[temp[i]] = 1

s = "Out.String,Out.Ln,Out.Int,Out.Char" \
",Out.Real,Out.LongReal,Out.Fixed,In.Done,In.Open,In.Char" \
",In.Int,In.Real,In.LongReal,In.Line"

split( s, temp, /,/ )
for (i=1; i in temp; i++)
mixed_case_words[tolower(temp[i])] = temp[i]
}

function Match( s )
{
if ( RSTART )
{ RMATCH = substr(s, RSTART, RLENGTH )
RLEFT = substr(s, 1, RSTART - 1)
RRIGHT = substr(s, RSTART + RLENGTH )
}
else
{ RMATCH = RLEFT = ""
RRIGHT = s
}
return RSTART
}

function error( s )
{ printf "\nError in file %s, line %d:\n", FILENAME, FNR >"/dev/stderr"
print " " s >err_device
errors++
}

Loading...