% From the book % PROLOG PROGRAMMING IN DEPTH % by Michael A. Covington, Donald Nute, and Andre Vellino % (Prentice Hall, 1997). % Copyright 1997 Prentice-Hall, Inc. % For educational use only % File READCHAR.PL % Michael A. Covington % Natural Language Processing for Prolog Programmers % (Prentice-Hall) % Appendix B % For all Prologs except Arity. See text. % read_charlists(-Charlists) % Reads a line of text, breaking it into a list of lists % of one-character atoms, [[l,i,k,e],[t,h,i,s]]. % Makes no attempt to recognize numbers. read_charlists(Charlists) :- read_char(FirstC,FirstT), complete_line(FirstC,FirstT,Charlists). % read_char(-Char,-Type) % Reads a character and runs it through char_type/1. read_char(Char,Type) :- get0(C), char_type(C,Type,Char). % complete_line(+FirstC,+FirstT,-Charlists) % Given FirstC (the first character) and FirstT (its type), % reads and tokenizes the rest of the line into charlists. complete_line(_,end,[]) :- !. % stop at end complete_line(_,blank,Charlists) :- % skip blanks !, read_charlists(Charlists). complete_line(FirstC,special,[[A]|Rest]) :- % special char !, name(A,[FirstC]), read_charlists(Rest). complete_line(FirstC,alpha,[Word|Rest]) :- % begin word complete_word(FirstC,alpha,Word,NextC,NextT), complete_line(NextC,NextT,Rest). % complete_word(+FirstC,+FirstT,-List,-FollC,-FollT) % Given FirstC (the first character) and FirstT (its type), % reads the rest of a word, putting its characters into List % as one-character atoms. complete_word(FirstC,alpha,[C|List],FollC,FollT) :- !, name(C,[FirstC]), read_char(NextC,NextT), complete_word(NextC,NextT,List,FollC,FollT). complete_word(FirstC,FirstT,[],FirstC,FirstT). % where FirstT is not alpha % char_type(+Code,?Type,-NewCode) % Given an ASCII code, classifies the character as % 'end' (of line/file), 'blank', 'alpha'(numeric), or 'special', % and changes it to a potentially different character (NewCode). char_type(10,end,10) :- !. % UNIX end of line mark char_type(13,end,13) :- !. % DOS end of line mark char_type(-1,end,-1) :- !. % get0 end of file code char_type(Code,blank,32) :- % blanks, other ctrl codes Code =< 32, !. char_type(Code,alpha,Code) :- % digits 48 =< Code, Code =< 57, !. char_type(Code,alpha,Code) :- % lower-case letters 97 =< Code, Code =< 122, !. char_type(Code,alpha,NewCode) :- % upper-case letters 65 =< Code, Code =< 90, !, NewCode is Code + 32. % (translate to lower case) char_type(Code,special,Code). % all others