词法分析器代码

Aki 发布于 2023-10-27 172 次阅读


#include<iostream>
#include<fstream>
#include<string>
#include<set>
#include<algorithm>
using namespace std;

//输入文件和输出文件
fstream file,token_stream;

//token结构体
struct token
{
        token(string f,string s,int c) noexcept : first(f),second(s),code(c) {}

        token(const token& rhs) noexcept : first(rhs.first),second(rhs.second),code(rhs.code) {}

        ~token() noexcept {}

        token() noexcept : first(),second(),code(0) {}

        const token& operator=(const token& rhs) noexcept
        {
                if(this != &rhs)
                {
                        first = rhs.first;
                        second = rhs.second;
                        code = rhs.code;
                }
                return *this;
        }

        friend bool operator <(const token& lhs,const token& rhs) noexcept
        {
                return lhs.first < rhs.first;
        }

        friend bool operator >(const token& lhs,const token& rhs) noexcept
        {
                return !(lhs < rhs);
        }

        friend bool operator ==(const token& lhs,const token& rhs) noexcept
        {
                return lhs.first == rhs.first;
        }

        friend bool operator ==(const token& lhs,const char& c) noexcept
        {
                return (lhs.first == string(1,c));
        }

        friend bool operator ==(const token& lhs,const string& str) noexcept
        {
                return lhs.first == str;
        }

        friend ostream& operator <<(ostream& os,const token& rhs) noexcept
        {
                return os << '(' <<rhs.first <<"," << rhs.second <<"," << rhs.code << ')';
        }

        string first;   //单词符号
        string second;  //种类
        int code;       //种别码
};

//token表
set<token> _mmap;
void init() noexcept
{
        _mmap.insert(token("endl", "key word", 0));
        _mmap.insert(token("void", "key word", 1));
        _mmap.insert(token("main", "key word", 2));
        _mmap.insert(token("int", "key word", 3));
        _mmap.insert(token("long", "key word", 4));
        _mmap.insert(token("float", "key word", 5));
        _mmap.insert(token("double", "key word", 6));
        _mmap.insert(token("char", "key word", 7));
        _mmap.insert(token("for", "key word", 8));
        _mmap.insert(token("while", "key word", 9));
        _mmap.insert(token("switch", "key word", 10));
        _mmap.insert(token("case", "key word", 11));
        _mmap.insert(token("break", "key word", 12));
        _mmap.insert(token("if", "key word", 13));
        _mmap.insert(token("else", "key word", 14));
        _mmap.insert(token("return", "key word", 15));
        _mmap.insert(token("+", "operator", 16));
        _mmap.insert(token("-", "operator", 17));
        _mmap.insert(token("*", "operator", 18));
        _mmap.insert(token("/", "operator", 19));
        _mmap.insert(token("=", "operator", 20));
        _mmap.insert(token(">", "operator", 21));
        _mmap.insert(token("<", "operator", 22));
        _mmap.insert(token("&", "operator", 23));
        _mmap.insert(token("|", "operator", 24));
        _mmap.insert(token("~", "operator", 25));
        _mmap.insert(token("==", "operator", 26));
        _mmap.insert(token(">=", "operator", 27));
        _mmap.insert(token("<=", "operator", 28));
        _mmap.insert(token("*=", "operator", 29));
        _mmap.insert(token("+=", "operator", 30));
        _mmap.insert(token("/=", "operator", 31));
        _mmap.insert(token("-=", "operator", 32));
        _mmap.insert(token("!=", "operator", 33));
        _mmap.insert(token("||", "operator", 34));
        _mmap.insert(token("++", "operator", 35));
        _mmap.insert(token("--", "operator", 36));
        _mmap.insert(token("<<", "operator", 37));
        _mmap.insert(token(">>", "operator", 38));
        _mmap.insert(token("(", "punctuator", 39));
        _mmap.insert(token(")", "punctuator", 40));
        _mmap.insert(token(";", "punctuator", 41));
        _mmap.insert(token("[", "punctuator", 42));
        _mmap.insert(token("]", "punctuator", 43));
        _mmap.insert(token("{", "punctuator", 44));
        _mmap.insert(token("}", "punctuator", 45));
        _mmap.insert(token(",", "punctuator", 46));
        _mmap.insert(token("Integer", "integer", 47));
        _mmap.insert(token("Floating point", "floating point", 48));
        _mmap.insert(token("Identify", "identify", 49));
        _mmap.insert(token("String", "string", 50));
        _mmap.insert(token(".", "punctuator", 51));
        _mmap.insert(token("!", "operator", 52));
        _mmap.insert(token("&&", "operator", 53));
        _mmap.insert(token(":", "operator", 54));
        _mmap.insert(token("::", "operator", 55));
        _mmap.insert(token("using","key word",56));
        _mmap.insert(token("namespace","key word",57));
        _mmap.insert(token("std","key word",58));
        _mmap.insert(token("struct","key word",59));
        _mmap.insert(token("noexcept","key word",60));
        _mmap.insert(token("const","key word",61));
        _mmap.insert(token("->","operator",62));
        _mmap.insert(token("operator","key word",63));
        _mmap.insert(token("this","key word",64));

        file.open("test",ios::in);
        token_stream.open("token_stream",ios::out);
        if(!file.is_open() || !token_stream.is_open())
        {
                exit(1);
        }
}

//判断是否为数字
bool is_digit(char c) noexcept
{
        if(c >= 48 && c <= 57)
        {
                return true;
        }
        return false;
}

//判断是否为字母
bool is_alpha(char c) noexcept
{
        if(c >= 65 && c <= 90)
        {
                return true;
        }
        else if(c >= 97 && c <= 122)
        {
                return true;
        }
        return false;
}

//判断是否为字母或数字
bool is_alnum(char c) noexcept
{
        return (is_digit(c) || is_alpha(c));
}

//判断是否是标点符号
bool is_punctuation(char c) noexcept
{
        if(c == ',' || c == '(' || c == '{' || c == '[' || c == ')' || c == '}' || c == ']' || c == ';' || c == '.')
        {
                return true;
        }
        return false;
}

//判断是否是操作符
bool is_operator(char c) noexcept
{
        if(c == ':'|| c == '!' || c == '=' || c == '+' || c == '-' || c == '*' || c == '/' || c == '~' || c == '|' || c == '&' || c == '<' || c == '>')
        {
                return true;
        }
        return false;
}

//解析
void parser(const string& buffer)
{
        string tmp;
        size_t n = buffer.size(),i = 0;
        for(;i < n;++i)
        {
                if(buffer[i] == '\r' || buffer[i] == '\n' || buffer[i] == 32)
                {
                        continue;
                }
                if(buffer[i] == '#')
                {
                        return;
                }
                if(buffer[i] == '/')
                {
                        size_t j = i + 1;
                        if(buffer[j] == '/')
                        {
                                return;
                        }
                }

                if(is_alpha(buffer[i]) || buffer[i] == '_')
                {
                        size_t j = i;
                        for(;j < n && !is_punctuation(buffer[j]) && (is_alnum(buffer[j]) || buffer[j] == '_') && buffer[j] != 32;++j)
                        {
                                tmp += buffer[j];
                        }
                        auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c.first == tmp;});
                        if(iter != _mmap.end())
                        {
                                token_stream << *iter << endl;
                        }
                        else
                        {
                                token_stream << '(' << tmp << ",identify,49)" << endl; 
                        }
                        i = j - 1;
                        tmp.clear();
                }
                else if(is_punctuation(buffer[i]))
                {
                        auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c == buffer[i];});
                        if(iter != _mmap.end())
                        {
                                token_stream << *iter << endl;
                        }
                }
                else if(is_operator(buffer[i]))
                {
                        size_t j = i;
                        for(;j < n && is_operator(buffer[j]) && buffer[j] != 32;++j)
                        {
                                tmp += buffer[j];
                        }
                        auto iter = find_if(_mmap.begin(),_mmap.end(),[&](const token& c){return c.first == tmp;});
                        if(iter != _mmap.end())
                        {
                                token_stream << *iter << endl;
                        }
                        i = j - 1;
                        tmp.clear();
                }
                else if(is_digit(buffer[i]))
                {
                        size_t j = i;
                        for(;j < n && buffer[j] != 32 && (is_digit(buffer[j]) || buffer[j] == '.');++j)
                        {
                                tmp += buffer[j];
                        }
                        auto iter = tmp.find('.');
                        if(iter != 18446744073709551615)
                        {
                                token_stream << '(' << tmp <<",floating point,48)" << endl;
                        }
                        else
                        {
                                token_stream << '(' << tmp <<",integer,47)" << endl;
                        }
                        i = j - 1;
                        tmp.clear();
                }
                else if(buffer[i] == '\"')
                {
                        size_t j = i + 1;
                        tmp += buffer[i];
                        for(;j < n && buffer[j] != '\"';++j)
                        {
                                tmp += buffer[j];
                        }
                        tmp += buffer[j];
                        token_stream << '(' << tmp << ",string,50)" << endl;
                        i = j;
                        tmp.clear();
                }
        }
}

int main()
{

        init();

        string buffer;
        while(getline(file,buffer))
        {
                parser(buffer);
        }

        file.close();
        token_stream.close();

        return 0;
}