#include<iostream>
#include<fstream>
#include<string>
#include<set>
#include<algorithm>
using namespace std;
//输入文件和输出文件
fstream file,token_stream;
//token结构体
struct token
{
token(string f,string s,int c) noexcept : first(f),second(s),code(c) {}
token(const token& rhs) noexcept : first(rhs.first),second(rhs.second),code(rhs.code) {}
~token() noexcept {}
token() noexcept : first(),second(),code(0) {}
const token& operator=(const token& rhs) noexcept
{
if(this != &rhs)
{
first = rhs.first;
second = rhs.second;
code = rhs.code;
}
return *this;
}
friend bool operator <(const token& lhs,const token& rhs) noexcept
{
return lhs.first < rhs.first;
}
friend bool operator >(const token& lhs,const token& rhs) noexcept
{
return !(lhs < rhs);
}
friend bool operator ==(const token& lhs,const token& rhs) noexcept
{
return lhs.first == rhs.first;
}
friend bool operator ==(const token& lhs,const char& c) noexcept
{
return (lhs.first == string(1,c));
}
friend bool operator ==(const token& lhs,const string& str) noexcept
{
return lhs.first == str;
}
friend ostream& operator <<(ostream& os,const token& rhs) noexcept
{
return os << '(' <<rhs.first <<"," << rhs.second <<"," << rhs.code << ')';
}
string first; //单词符号
string second; //种类
int code; //种别码
};
//token表
set<token> _mmap;
void init() noexcept
{
_mmap.insert(token("endl", "key word", 0));
_mmap.insert(token("void", "key word", 1));
_mmap.insert(token("main", "key word", 2));
_mmap.insert(token("int", "key word", 3));
_mmap.insert(token("long", "key word", 4));
_mmap.insert(token("float", "key word", 5));
_mmap.insert(token("double", "key word", 6));
_mmap.insert(token("char", "key word", 7));
_mmap.insert(token("for", "key word", 8));
_mmap.insert(token("while", "key word", 9));
_mmap.insert(token("switch", "key word", 10));
_mmap.insert(token("case", "key word", 11));
_mmap.insert(token("break", "key word", 12));
_mmap.insert(token("if", "key word", 13));
_mmap.insert(token("else", "key word", 14));
_mmap.insert(token("return", "key word", 15));
_mmap.insert(token("+", "operator", 16));
_mmap.insert(token("-", "operator", 17));
_mmap.insert(token("*", "operator", 18));
_mmap.insert(token("/", "operator", 19));
_mmap.insert(token("=", "operator", 20));
_mmap.insert(token(">", "operator", 21));
_mmap.insert(token("<", "operator", 22));
_mmap.insert(token("&", "operator", 23));
_mmap.insert(token("|", "operator", 24));
_mmap.insert(token("~", "operator", 25));
_mmap.insert(token("==", "operator", 26));
_mmap.insert(token(">=", "operator", 27));
_mmap.insert(token("<=", "operator", 28));
_mmap.insert(token("*=", "operator", 29));
_mmap.insert(token("+=", "operator", 30));
_mmap.insert(token("/=", "operator", 31));
_mmap.insert(token("-=", "operator", 32));
_mmap.insert(token("!=", "operator", 33));
_mmap.insert(token("||", "operator", 34));
_mmap.insert(token("++", "operator", 35));
_mmap.insert(token("--", "operator", 36));
_mmap.insert(token("<<", "operator", 37));
_mmap.insert(token(">>", "operator", 38));
_mmap.insert(token("(", "punctuator", 39));
_mmap.insert(token(")", "punctuator", 40));
_mmap.insert(token(";", "punctuator", 41));
_mmap.insert(token("[", "punctuator", 42));
_mmap.insert(token("]", "punctuator", 43));
_mmap.insert(token("{", "punctuator", 44));
_mmap.insert(token("}", "punctuator", 45));
_mmap.insert(token(",", "punctuator", 46));
_mmap.insert(token("Integer", "integer", 47));
_mmap.insert(token("Floating point", "floating point", 48));
_mmap.insert(token("Identify", "identify", 49));
_mmap.insert(token("String", "string", 50));
_mmap.insert(token(".", "punctuator", 51));
_mmap.insert(token("!", "operator", 52));
_mmap.insert(token("&&", "operator", 53));
_mmap.insert(token(":", "operator", 54));
_mmap.insert(token("::", "operator", 55));
_mmap.insert(token("using","key word",56));
_mmap.insert(token("namespace","key word",57));
_mmap.insert(token("std","key word",58));
_mmap.insert(token("struct","key word",59));
_mmap.insert(token("noexcept","key word",60));
_mmap.insert(token("const","key word",61));
_mmap.insert(token("->","operator",62));
_mmap.insert(token("operator","key word",63));
_mmap.insert(token("this","key word",64));
file.open("test",ios::in);
token_stream.open("token_stream",ios::out);
if(!file.is_open() || !token_stream.is_open())
{
exit(1);
}
}
//判断是否为数字
bool is_digit(char c) noexcept
{
if(c >= 48 && c <= 57)
{
return true;
}
return false;
}
//判断是否为字母
bool is_alpha(char c) noexcept
{
if(c >= 65 && c <= 90)
{
return true;
}
else if(c >= 97 && c <= 122)
{
return true;
}
return false;
}
//判断是否为字母或数字
bool is_alnum(char c) noexcept
{
return (is_digit(c) || is_alpha(c));
}
//判断是否是标点符号
bool is_punctuation(char c) noexcept
{
if(c == ',' || c == '(' || c == '{' || c == '[' || c == ')' || c == '}' || c == ']' || c == ';' || c == '.')
{
return true;
}
return false;
}
//判断是否是操作符
bool is_operator(char c) noexcept
{
if(c == ':'|| c == '!' || c == '=' || c == '+' || c == '-' || c == '*' || c == '/' || c == '~' || c == '|' || c == '&' || c == '<' || c == '>')
{
return true;
}
return false;
}
//解析
void parser(const string& buffer)
{
string tmp;
size_t n = buffer.size(),i = 0;
for(;i < n;++i)
{
if(buffer[i] == '\r' || buffer[i] == '\n' || buffer[i] == 32)
{
continue;
}
if(buffer[i] == '#')
{
return;
}
if(buffer[i] == '/')
{
size_t j = i + 1;
if(buffer[j] == '/')
{
return;
}
}
if(is_alpha(buffer[i]) || buffer[i] == '_')
{
size_t j = i;
for(;j < n && !is_punctuation(buffer[j]) && (is_alnum(buffer[j]) || buffer[j] == '_') && buffer[j] != 32;++j)
{
tmp += buffer[j];
}
auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c.first == tmp;});
if(iter != _mmap.end())
{
token_stream << *iter << endl;
}
else
{
token_stream << '(' << tmp << ",identify,49)" << endl;
}
i = j - 1;
tmp.clear();
}
else if(is_punctuation(buffer[i]))
{
auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c == buffer[i];});
if(iter != _mmap.end())
{
token_stream << *iter << endl;
}
}
else if(is_operator(buffer[i]))
{
size_t j = i;
for(;j < n && is_operator(buffer[j]) && buffer[j] != 32;++j)
{
tmp += buffer[j];
}
auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c.first == tmp;});
if(iter != _mmap.end())
{
token_stream << *iter << endl;
}
i = j - 1;
tmp.clear();
}
else if(is_digit(buffer[i]))
{
size_t j = i;
for(;j < n && buffer[j] != 32 && (is_digit(buffer[j]) || buffer[j] == '.');++j)
{
tmp += buffer[j];
}
auto iter = tmp.find('.');
if(iter != 18446744073709551615)
{
token_stream << '(' << tmp <<",floating point,48)" << endl;
}
else
{
token_stream << '(' << tmp <<",integer,47)" << endl;
}
i = j - 1;
tmp.clear();
}
else if(buffer[i] == '\"')
{
size_t j = i + 1;
tmp += buffer[i];
for(;j < n && buffer[j] != '\"';++j)
{
tmp += buffer[j];
}
tmp += buffer[j];
token_stream << '(' << tmp << ",string,50)" << endl;
i = j;
tmp.clear();
}
}
}
int main()
{
init();
string buffer;
while(getline(file,buffer))
{
parser(buffer);
}
file.close();
token_stream.close();
return 0;
}
词法分析器代码
发布于 2023-10-27 171 次阅读
Comments NOTHING