*=*=*=*= ToTag.html =*=*=*=*
Integer Function ToTag(length,word,tag) implicit integer(a-z) parameter (mxnoun=1000,mxlen=50,fnice=1.0) common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun character*(mxlen) noun(mxnoun) character*(mxlen) clower,clotag integer len_noun(mxnoun),weight_noun(mxnoun) integer freq_noun(mxnoun) character*(*) word,tag c totag = length tag = word if(length.eq.0) return clotag = clower(word,length) do 1 i=1,nnoun if(len_noun(i).ne.length) goto 1 if(noun(i)(:length).ne.clotag(:length)) goto 1 c c if first occurence, make reference, otherwise link c if(weight_noun(i).eq.0) then weight_noun(i) = 1 tag = 'clotag(:length)// & '">'//word(:length)//'' totag = 15 + 2*length else if(weight_noun(i).eq.1) then c c link c tag = 'clotag(:length)// & '">'//word(:length)//'' totag = 16 + 2*length else if(weight_noun(i).eq.2) then c c external filename c tag = clotag(:length)// & '">'//word(:length)//'' totag = 15 + 2*length endif return 1 continue end
*=*=*=*= Txt2HTML.html =*=*=*=*
Program Txt2HTML implicit integer(a-z) parameter (mxlin=250,mxlen=50,mxnoun=1000,mxpar=20000) parameter (mxpass=2,mxtag=100) parameter (fnice=1.0) common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun c character*(mxtag) tag character*(mxlin) cline character*(mxlen) noun(mxnoun) character*(mxlen) word,lastword,nextword,sfile character*(mxpar) para,temp,chtml character*16 cpunct /'-:;,/.?!"''(){}[]'/ character*10 tchar character*3 ccthree character*1 c,cc,cclast c integer len_noun(mxnoun),weight_noun(mxnoun) integer freq_noun(mxnoun) c real f c logical lpunct,lalpha,lnumbr,lclaus c external lenocc,totag c c statement functions c lclaus(c) = index(cpunct,c).gt.5 lalpha(c) = (lge(c,'a').and.lle(c,'z')).or. & (lge(c,'A').and.lle(c,'Z')) lnumbr(c) = lge(c,'0').and.lle(c,'9') c c call getarg(1,sfile,status) c if(status.le.0) goto 900 c open(1,file=sfile(:status),status='old',err=900) open(1,status='old',readonly,err=900) c open(7,file='user',title='Candidate Nouns',iofocus=.true.) c nnoun = 0 lpar = 0 pass = 1 c c Read the file and accumulate the paragraphs c 1 read(1,'(a)',end=2,err=2) cline c ll = lenocc(cline) c if(ll.le.0.and.lpar.ne.0) then c c empty line ... process words in preceding paragraph c lhtml = 0 lwordl = 0 lwordn = 0 lword = 0 lastword = ' ' nextword = ' ' word = ' ' mark = 1 markl = 0 markn = 0 ipos = 0 cc = char(1) 3 ipos = ipos+1 cclast = cc if(ipos.gt.lpar) goto 4 cc = para(ipos:ipos) if(cc.eq.char(9)) cc = ' ' good3 = 1 if(ipos.gt.1.and.ipos.lt.lpar) then ccthree=para(ipos-1:ipos+1) if(lalpha(cclast).or.lnumbr(cclast)) good3=2 if(lalpha(para(ipos+1:ipos+1)).or. & lnumbr(para(ipos+1:ipos+1))) good3=good3+1 endif c c add characters to nextword c if(lalpha(cc).or.lnumbr(cc)) then if(lwordn.lt.mxlen) then lwordn = lwordn+1 nextword(lwordn:lwordn) = cc endif goto 3 c c special for files ... assumes structure like A.B ! c else if(cc.eq.'.'.and.good3.eq.3) then if(lwordn.lt.mxlen) then lwordn = lwordn+1 nextword(lwordn:lwordn) = cc endif goto 3 else if(ccthree(2:3).eq.':\'.and.lalpha(cclast)) then if(lwordn.lt.mxlen-1) then lwordn = lwordn+2 nextword(lwordn-1:lwordn) = ':\' ipos = ipos+1 endif goto 3 else if(cc.eq.'\'.and.index(nextword(:lwordn),'\').ne.0) then if(lwordn.lt.mxlen) then lwordn = lwordn+1 nextword(lwordn:lwordn) = cc endif goto 3 else if(pass.lt.mxpass) then markn = 0 if(lclaus(cc)) markn = 1 c c end of word. c if(lword*lwordn.ne.0.and.pass.lt.mxpass) then call context(lastword(:lwordl), & word(:lword), & nextword(:lwordn),markl) c shift words left endif if(lwordn.eq.0) goto 3 lastword = word lwordl = lword word = nextword lword = lwordn lwordn = 0 c c mark signifies whether "word" was preceded by a clause end c markl = mark mark = markn goto 3 else if(pass.eq.mxpass) then c c Last pass .... pipe out HTML c lastword = word lwordl = lword word = nextword lword = lwordn lwordn = 0 markl = mark mark = markn c c check for tag c ltag = 0 if(lword.ne.0) ltag = totag(lword,word,tag) temp = chtml if(lhtml+ltag+1.ge.70) then write(6,*) chtml(:lhtml) lhtml = 0 endif c c check for translation chars c tchar(1:1) = cc lt = 1 if(cc.eq.'<') then tchar = '<' lt = 5 else if(cc.eq.'>') then tchar = '>' lt = 5 else if(cc.eq.'&') then tchar = '&' lt = 5 endif if(ltag.eq.0) then chtml = temp(:lhtml)//tchar(:lt) lhtml = lhtml+lt else if(cc.ne.' '.or.cclast.ne.cc) then chtml = temp(:lhtml)//tag(:ltag)//tchar(:lt) lhtml = lhtml+ltag+lt else chtml = temp(:lhtml)//tag(:ltag) lhtml = lhtml+ltag endif goto 3 endif 4 lpar = 0 if(lwordl*lword.ne.0.and.pass.lt.mxpass) & call context(lastword(:lwordl),word(:lword),' ',markl) if(pass.eq.mxpass.and.ll.eq.0) then write(6,*) chtml(:lhtml) lhtml = 0 write(6,*) '
' endif else c c add line to para c temp = para para = temp(:lpar)//' '//cline(:ll)
lpar = lpar + ll
+ 1 endif goto 1 c c End of pass ... c 2 continue if(pass.lt.mxpass) then write(7,*) ' A total of ',nnoun,'
candidate nouns at pass ',pass write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c pass
= pass+1 c c Prepare final weights for tagging pass c if(pass.eq.mxpass.and.nnoun.gt.0)
then inoun = 1 6 if(inoun.le.nnoun) then f = real(weight_noun(inoun))/real(freq_noun(inoun)) weight_noun(inoun)
= 0 c c For filenames, set the weight to be 2 c if(index(noun(inoun),'.').ne.0)
weight_noun(inoun)=2 if(index(noun(inoun),'\').ne.0) weight_noun(inoun)=2 if(f.lt.fnice)
then c c remove from list c write(7,*) ' Rejecting ',noun(inoun),f do i=inoun+1,nnoun noun(i-1) = noun(i) weight_noun(i-1) = weight_noun(i)
freq_noun(i-1) = freq_noun(i) len_noun(i-1) =
len_noun(i) enddo nnoun
= nnoun-1 else inoun = inoun + 1 endif goto 6 endif write(7,*) ' A total of ',nnoun,' filtered nouns' write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c if(pass.le.mxpass) then lpar = 0 rewind(1) goto 1
endif write(6,*) chtml(:lhtml) stop 900
write(6,*) ' Error opening input file' end *=*=*=*= context.html =*=*=*=*
subroutine context(lastword,word,nextword,mark) implicit integer(a-z) parameter (mxnoun=1000,mxlen=50) common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun character*(mxlen) lowlastword,lowword,lownextword character*(mxlen) noun(mxnoun) character*(mxlen) clower character*(*) lastword,word,nextword character*(1) c external clower logical lcapit integer len_noun(mxnoun),weight_noun(mxnoun),freq_noun(mxnoun) c lcapit(c) = lge(c,'A').and.lle(c,'Z') c if (nnoun.ge.mxnoun) return if (word.eq.' ') return lw = lenocc(word) lowword = clower(word,lw) c c Get index of this noun, if it exists c ifound = 0 do 1 i=1,nnoun if(noun(i).eq.lowword) then ifound = i goto 2 endif 1 continue 2 continue c c Check the candidate word as a plural c if(lowword(lw:lw).eq.'s'.and.lw.gt.2) then weight = 4 do i=1,nnoun if(lowword(:lw-1).eq.noun(i)) then weight_noun(i) = weight_noun(i)+1 freq_noun(i) = freq_noun(i)+1 goto 100 endif enddo endif lwl = lenocc(lastword) lwn = lenocc(nextword) lownextword = clower(nextword,lwn) lowlastword = clower(lastword,lwl) c c If word contains . or \ then it is a file c weight = 50 if(index(word(:lw),'.').ne.0.or.index(word(:lw),'\')) goto 100 c c If not start of clause, and first letter is capital, then noun c weight = 10 if(mark.eq.0.and.lcapit(word(1:1))) goto 100 c c If start of clause, and first two letters are capital, then noun c weight = 5 if(mark.ne.0.and.lw.gt.1.and.lcapit(word(2:2))) goto 100 c c c Check if word on right is a noun c weight = 0 do i=1,nnoun if(lownextword.eq.noun(i)) then weight = -2 goto 100 endif enddo weight = 2 if (lowlastword.eq.'the') goto 100 weight = 1 if (lowlastword.eq.'a') goto 100 c c probably not a noun c weight = -1 c 100 continue if(ifound.ne.0) then weight_noun(ifound)=weight_noun(ifound)+weight freq_noun(ifound)=freq_noun(ifound)+1 endif c c reduce weight on left if that is supposed to be a noun, too c do i=1,nnoun if(lowlastword.eq.noun(i)) then weight_noun(i) = weight_noun(i)-2 endif end do if(weight.le.0.or.ifound.ne.0) return nnoun = nnoun+1 noun(nnoun) = lowword len_noun(nnoun) = lw weight_noun(nnoun) = weight freq_noun(nnoun) = 1 end
c *=*=*=*= Lenocc.html =*=*=*=*
Integer Function Lenocc(c) Character*(*) c do lenocc=len(c),1,-1 if(c(lenocc:lenocc).ne.' ') return enddo lenocc = 0 end
*=*=*=*= clower.html =*=*=*=*
character*(*) function clower(cword,l) character*(*) cword character*(26) lc /'abcdefghijklmnopqrstuvwxyz'/ character*(26) uc /'ABCDEFGHIJKLMNOPQRSTUVWXYZ'/ clower = cword do i=1,l ip = index(uc,cword(i:i)) if(ip.ne.0) clower(i:i) = lc(ip:ip) enddo end