C++Primer学习笔记之剖析tquery.cpp程序的方法

来源：www.45fan.com 2016-09-01 18:38:37

tquery.c

我用的编译器是gnu c++ 和 vc2003，为了此程序能够执行需做以下的修改：

1 tquery.c-> tquery.cpp

2 <iostream.h>-> <iostream>, <fstream.h>-> <fstream> , <stddef.h> -><cstddef>

3 增加 #include <iterator> 和 using namespace std;

4 删除 allocator 和它前面的“，”号，注意在要在>>之间留一个空格，因为编译器不是神仙，它会把>>当成操作符

5 删除250行的diff_type，对于现在的编译器，它已经过时了

6 对于执行gnu c++ 执行：g++ -o tquery.ext tquery.cpp 〔ENTER〕

7 对于vc2003 执行：cl tquery.cpp 〔ENTER〕

#include <algorithm> //提供泛型算法的接口，比如copy

#include <string>

#include <vector>

#include <utility> //pair的接口

#include <map>

#include <set>

#include <iostream>

#include <fstream>

#include <cstddef> //定义了NULL，size_t等类型

#include <ctype.h> //为了大小写字母的处理

#include <iterator> //因为运用了ostream_iterator模板对象所以这是必须的，而原程序没有包含

using namespace std; //为了方便，所以显式指定名字空间是标准名字空间

typedef pair<short,short> location; //这样做完全是为了方便理解，没有太大的意义

typedef vector<location> loc;

typedef vector<string> text;

typedef pair<text*,loc*> text_loc; //完整的类型是pair< vector<string>*, vector< pair<short,short> >* >

//如果是我写的话，肯定会把location,loc,text,text_loc排在一列，李破门先生为何要这么做？我想大概是个人的喜好问

//题，这样做有它的好处，就是不会把定义相互混淆

class TextQuery {

public:

TextQuery() { memset( this, 0, sizeof( TextQuery )); } /*我们知道memset是一个C语言的函数，这句把this指向的那块内存大小为sizeof（TextQuery）个单元初值设置为0，我对这个构造函数的理解是：因为此类的成员函数操纵的是一些string，所以把内存做这样的解释是为了提高string处理的效率，因为memset虽然返回的是void*指针，但是却被编译器解释成char*指针*/

static void filter_elements( string felems ) { filt_elems = felems; }//静态成员函数说：“我是为了filt_elems而存在”

void query_text();

void display_map_text();

void display_text_locations();

void doit() {

retrieve_text();

separate_words();

filter_text();

suffix_text();

strip_caps();

build_word_map();

}

private:

void retrieve_text();

void separate_words();

void filter_text();

void strip_caps();

void suffix_text();

void suffix_s( string& );

void build_word_map();

private:

vector<string> *lines_of_text;

text_loc *text_locations;

map<string,loc*,less<string> > *word_map;

static string filt_elems; //我是属于类的，所以我属于所有对象，但我与它们一点关系也没有，即//使它们不存在，我也存在, 而且我是唯一的，不会每个对象都有一份，所以我节省空间，注意不要在类里初始化我，//为什么？去问Bjarne Stroustrup，那是俺爹

};

string TextQuery::filt_elems( "/",.;:!?)(///" );

int main()

{

TextQuery tq;

tq.doit();

tq.query_text();

tq.display_map_text();

return 0;

}

//获得文本

/*我先大体的叙述一下retrieve_text函数是做什么和怎么做的，此函数读取输入的文本文件中的每一行，一行一行地记录其内容，为什么这么说呢，你想想看: 它把输入的文本文件一行一行的读出来，看做string，用push_back放在了vector中，简言之vector中的0号元素就是text文件的第一行，以此类推。
它是如何做的呢？首先它构建了一个ifstream对象，这个对象的本身代表输入文本文件自己，接着getline登场，注意getline有两个一个是为流提供的getline，一个是在<string>中为string提供的getline，此处用的是后者。getline顾名思义，就是得到文件的一行，其实那倒也不一定，取决于getline的第三个参数，默认的参数就是“/n",“/n"意味着getline就是名副其实的get line，接下来vector的push_back粉墨登场，它把从getline获取的每一行做为string放进vector容器中，此时vector的大小会自动的加1。最终的结果就是，输入的文本被放在了一个string vector容器中，而前面的类的私有成员：vector<string> *lines_of_text就是指向这个容器的指针，抢答：(*lines_of_text)[ 0 ]代表什么？欧，是的，它代表输入文本第一行，如果你要求严格一点，它代表输入文本第一行的一个拷贝。其它的部分很简单，就此略过。*/

void

TextQuery::

retrieve_text()

{

string file_name;

cout << "please enter file name: ";

cin >> file_name;

ifstream infile( file_name.c_str(), ios::in ); //infile是一个ifstream对象,当然ifstream是一个由typedef定义的模板类，它锁定了char类型做为参数，构造函数需要一个const char*，ios::in的意义是首先ios是一个锁定char类型的流模板，而in是告诉流要为输入而打开一个文件

if ( !infile ) {

cerr << "oops! unable to open file "

<< file_name << " -- bailing out!/n";

exit( -1 );

}

else cout << "/n";

lines_of_text = new vector<string>;

string textline;

while ( getline( infile, textline, '/n' ))

lines_of_text->push_back( textline ); //把一行放入lines_of_text所指向的string vector中，vector自加1

}

//分离出单个的词并捕获其位置（？行，？列）

问题1：如何分离出单个词？

答：根据词间的空格

问题2：如何迭代？

答：for循环迭代行，while循环寻找每一行的空格

void

TextQuery::

separate_words()

{

vector<string> *words = new vector<string>;

vector<location> *locations = new vector<location>;

for ( short line_pos = 0; line_pos < lines_of_text->size(); line_pos++ ) /*对于书中给出的输入文本文件text_file->size()应该是6，因为string vector就是6行所以对于for循环就是for（short line_pos ＝ 0；line_pos < 6; line_pos++）,共循环6次来迭代每一行*/

{

short word_pos = 0;

string textline = (*lines_of_text)[ line_pos ];

string::size_type eol = textline.length();

string::size_type pos = 0, prev_pos = 0;

while (( pos = textline.find_first_of( ' ', pos )) != string::npos ) /*例如：对于第一行：

Alice Emma has long flowing red hair. Her Daddy says eol: 52 pos: 5 line: 0 word: 0 substring: Alice 意思是说，数到52,第一行结束，数到第5时出现个空格（记住Lippman先生说过要从0开始数），是第0行，此时标记出第一个词，位置是0，内容是Alice，再把Alice放进words中，words是一个string vector，想想为什么要放进vector中，因为要给单独的词一个代号比如0号，1号，2号.......*/

{

words->push_back( textline.substr( prev_pos, pos - prev_pos ));

locations->push_back( make_pair( line_pos, word_pos )); /*locations是一个pair vector，执行完此句locations就变成｛word[0] =（0, 0）......｝了*/

word_pos++; pos++; prev_pos = pos;

}

words->push_back( textline.substr( prev_pos, pos - prev_pos ));

locations->push_back( make_pair( line_pos, word_pos ));

}

text_locations = new text_loc( words, locations );

text_loc完整的类型是：

pair< vector<string>*, vector< pair<short,short> >* >,

就像这样:（word[0]，(0,0)[0]）,(word[1], (0,1)[1]) 。。。。。。

//滤去标点符号

void

TextQuery::

filter_text()

{

if ( filt_elems.empty() )

路由器

电脑

手机

网络

C++Primer学习笔记之剖析tquery.cpp程序的方法

推荐阅读