CppBuzz.com
  
Home C C++ Java Python Perl PHP Spring SQL Javascript Linux Forum

You are here : Home » Solved Programs on C++ » C++ program on text analysis

C++ program on text analysis

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <iomanip>
#include <iostream>

using namespace std;

/*
Note: Input file should be - PROS.txt
*/
bool buildMap();
void writeIndex();
void AddWordtoMap(std::string);
unsigned int avgWordLength();

//map to store words
map <string, int> wordMap;
//Single letter map
map <string, int> SingleLetterMap;
//double letter map
map <string, int> DoubleLetterMap;

unsigned long int countWordsInFile = 0;
unsigned long int numberOfSentences = 0;
unsigned long int avgNumberOfWordsPerSentences = 0;
void saveWordsInFile();
void readLettersFromFile();
void read2LettersWordFromFile();
void print2LetterWords();
void AddLettertoMap(string);
void printLetterMap();
void AddDoubleLettertoMap(string);
void print2LetterWords();

int main(int argc, char** argv) {

if (buildMap()){
cout<<"\n Assuming File Contains only one sentence in a line";

cout<<"\n The file has "<< wordMap.size() << " words ";
writeIndex();
cout<<"\n\n Average World Length is : "<<avgWordLength();
cout<<"\n Total Number of Words in file : "<<countWordsInFile;
cout<<"\n Total Number of Sentences in file : "<<numberOfSentences;
avgNumberOfWordsPerSentences = countWordsInFile/numberOfSentences;
cout<<"\n Average Number of Words in one sentence : "<<avgNumberOfWordsPerSentences;
saveWordsInFile();
readLettersFromFile();
read2LettersWordFromFile();
}
return 0;
}

bool buildMap(){
const string filename ="PROS.txt";

ifstream inputFile(filename.c_str());
string line;

if(inputFile.is_open()){
cout<<"File opened Successfully...\n";

while(getline(inputFile, line)){
numberOfSentences++;
std::size_t prev =0, pos;

while((pos =  line.find_first_of
("~`=!@#$%^&*)/\?-_|[,. }] (_-+{;':""></", prev)) != std::string::npos){
	if(pos>prev){
	AddWordtoMap(line.substr(prev, pos-prev));
	}
	prev= pos+1;
}
	if(prev< line.length()){
	AddWordtoMap(line.substr(prev, std::string::npos));
	}
}

inputFile.close();

}
else
{
	cout<<"\n-Unable to open file-"<<filename<<"\n"<<endl;
	return false;
}

return true;
}

void AddWordtoMap(std:: string str){
map<string, int> ::iterator it = wordMap.find(str);

if(it!=wordMap.end()){
    it->second = it->second + 1;
}
else{
    wordMap.insert(std::make_pair(str, 1));
}
//increment word count
countWordsInFile++;

}

void writeIndex(){
for(map<string, int> ::iterator itr = wordMap.begin(); itr!=wordMap.end(); ++itr){
std::cout<<"\n  "<< setw(30) <<itr->first<< setw(10)<<itr->second;
}	
}

unsigned int avgWordLength(){
unsigned int calculatelength = 0;
for(map<string, int> ::iterator itr = wordMap.begin(); itr!=wordMap.end(); ++itr){
calculatelength +=  (itr->first).length();
}	

if(calculatelength)
	return calculatelength/wordMap.size();
else
return 0;	
}

void saveWordsInFile(){
 //temporary map to sort words alphabeticaly
 multimap<int ,string,greater<int> >  MMtable;
     map<string,int>::iterator iter; 
     for(iter=wordMap.begin(); iter!=wordMap.end(); ++iter){
         MMtable.insert(make_pair(iter->second,iter->first));
     }

cout<<"\n";
        
multimap<int, string>::iterator iter1;
for(iter1=MMtable.begin(); iter1!=MMtable.end(); ++iter1){
//std::cout << " " << setw(10)  << iter1->second << setw(10)<<  iter1->first << std::endl;
//save this data into text file
string outputFile = "WORDLIST.TXT";
ofstream file(outputFile.c_str());
file<<setw(10)  << iter1->second << setw(10)<<  iter1->first << std::endl;
file.close();
}
}

void readLettersFromFile()
{
const string filename ="PROS.txt";
ifstream inputFile(filename.c_str());
string line;

if(inputFile.is_open()){
{

char temp = NULL;
while(getline(inputFile, line)){

     for(int i=0; i<line.length(); i++){
         temp = line[i];
     	 AddLettertoMap(string(1, temp));
     }
	}
}

inputFile.close();
}
else
{
	cout<<"\n-Unable to open file-"<<filename<<"\n"<<endl;
}    
  printLetterMap();
}

void read2LettersWordFromFile()
{
const string filename ="PROS.txt";

ifstream inputFile(filename.c_str());
string line;

if(inputFile.is_open()){
while(getline(inputFile, line)){
numberOfSentences++;

std::size_t prev =0, pos;

while((pos =  line.find_first_of
("~`=!@#$%^&*)/\?-_|[,. }] (_-+{;':""></", prev)) != std::string::npos){
	{
	if(pos>prev){
	    	string temp  = line.substr(prev, pos-prev);  
	    		if(temp.length()==2)
	AddDoubleLettertoMap(line.substr(prev, pos-prev));
	}
	prev= pos+1;
     }
	if(prev< line.length()){
	    	    	string temp  = line.substr(prev, pos-prev);  
	    		if(temp.length()==2)
	AddDoubleLettertoMap(line.substr(prev, std::string::npos));
	}
    }
}

inputFile.close();
}
else
{
	cout<<"\n-Unable to open file-"<<filename<<"\n"<<endl;
}  
print2LetterWords();
}

void printLetterMap(){
cout<<"\n Printing 1 Letter Frequancy : ";
 for(map<string, int> ::iterator itr = SingleLetterMap.begin(); itr!=SingleLetterMap.end(); ++itr){
std::cout<<"\n  "<< setw(30) <<itr->first<< setw(10)<<itr->second;   
}
}

void print2LetterWords(){
cout<<"\n\n Printing 2 Letters Frequancy : ";
for(map<string, int> ::iterator itr = DoubleLetterMap.begin(); itr!=DoubleLetterMap.end(); ++itr){
std::cout<<"\n  "<< setw(30) <<itr->first<< setw(10)<<itr->second; 
}
}

void AddLettertoMap(string ch){
map<string, int> ::iterator it = SingleLetterMap.find(ch);

if(it!=SingleLetterMap.end()){
    it->second = it->second + 1;
}
else{
    SingleLetterMap.insert(std::make_pair(ch, 1));
}
}

void AddDoubleLettertoMap(std::string char2){
map<string, int> ::iterator it = DoubleLetterMap.find(char2);

if(it!=DoubleLetterMap.end()){
    it->second = it->second + 1;
}
else{
    DoubleLetterMap.insert(std::make_pair(char2, 1));
}
}

c++ code for text analysis