Hello all
I Have problem about , How i can compute accuracy to unigram,bigram and trigram
and how i can change the size to iteration separate from 1 to 10 in each stage
from iteration train take 90% and training 10%.
thank you to read my message
import codecs
import nltk
from nltk import*
outfile = codecs.open('unigram_tagged_sents_out.txt','w','utf-8')
outfile2 = codecs.open('bigram_tagged_sents_out.txt','w','utf-8')
outfile3 = codecs.open('trigram_tagged_sents_out.txt','w','utf-8')
File1=codecs.open('C:\project\Corpus_word.txt','r','utf_8').readlines()
word_pos_list = []
tokens=[]
train_sents=[]
test_tagged_sents=[]
all_test_sents = []
n=10
for line in File1:
tokens = line.split('\t')
#print '%s\t%s\t%s' % (tokens[0], tokens[1], tokens[2])
word_pos_list.append((tokens[0], tokens[1]))
all_test_sents.append(tokens[0])
for t in range(10):
size=int(len(word_pos_list)*(0.9))
#print size
train_sents.append(word_pos_list[:size])
test_tagged_sents.append(word_pos_list[size:])
test_sents=all_test_sents[size:]
print "unigram tagger"
#Unigram tagger
unigram_tagger = nltk.UnigramTagger(train_sents)
tagged_unigram_sents = unigram_tagger.tag(test_sents)
print unigram_tagger.evaluate(test_tagged_sents)
for (word, tag) in tagged_unigram_sents:
print>>outfile, '%s\t%s' % (word, tag)
print nltk.accuracy(tagged_unigram_sents,test_sents)
#bigram tagger
print "Bigram Tagger"
bigram_tagger = nltk.BigramTagger(train_sents,backoff= unigram_tagger)
tagged_bigram_sents=bigram_tagger.tag(test_sents)
print bigram_tagger.evaluate(test_tagged_sents)
for (word, tag) in tagged_bigram_sents:
print>>outfile2, '%s\t%s' % (word, tag)
#Trigram tagger
print "Trigram Tagger"
trigram_tagger=nltk.TrigramTagger(train_sents,backoff= bigram_tagger)
tagged_trigram_sents=trigram_tagger.tag(test_sents)
print trigram_tagger.evaluate(test_tagged_sents)
for (word, tag) in tagged_trigram_sents:
print>>outfile3, '%s\t%s' % (word, tag)
outfile.close()
outfile2.close()
outfile3.close()
print 'Done!'
#accuracy = unigram_tagger.evaluate(tagged_test_sents)
#print 'accuracy = ', accuracy
#train_sents.append((word_pos_list[:size]))
#print train_sents
#test_sents.append(word_pos_list[size:])
#print test_sents
#bigram_tagger=nltk.BigramTagger(train_sents)
#print bigram_tagger.tag(tokens[:size])
#print bigram_tagger._train(train_sents,cutoff=size)
#print bigram_tagger.evaluate(test_sents)
--
https://mail.python.org/mailman/listinfo/python-list