diff --git a/c_lexer.rb b/c_lexer.rb new file mode 100644 index 0000000..42ef6c7 --- /dev/null +++ b/c_lexer.rb @@ -0,0 +1,365 @@ +def error(msg) + puts "ERROR: #{msg}" + exit 1 +end + +class Program + def initialize() + + end +end + +#FunctionHeader + Semicolon = FunctionPrototype +#FunctionHeader + Bracket = FunctionDeclaration +# +#Type + Identifier + [Identifier] + Semicolon = VarDecl +#Type + Identifier + Equal + Expr = VarDeclWithValue + +class Construct + def initialize(stream) + @stream, @back = stream, [] + end + + def spit + @stream.prepend @back.pop + end + + def vomit + @back.reverse.each { spit } + end + + def bite(flavor) + @back << @stream.shift + #bite flavor if @back.last.type == :white + taste_like? flavor + end + + def bite_until(flavor) + until bite flavor + return if @stream.empty? + end + end + + def discard_until(flavor) + null = nil + until null == flavor + h = @stream.shift + break if h.nil? + null = h.type + end + end + + #lick to get flavor + def lick + return @stream.first.type if @stream.first.type == Token + return @stream.first.class + end + + #lick until stopping point + def deep_lick(stop) + tongue = [] + @stream.each {|s| + break if s.type == stop + tongue << s.type if s.class == Token + tongue << s.class if s.class != Token + } + return tongue + end + + def deep_bite(stop) + tongue = [] + @stream.each {|s| + break if s.type == stop + tongue << s + } + return tongue + end + + def taste_like?(flavor) + return true if flavor == :any + return flavor.include? @back.last.class == Token ? @back.last.type : @back.last.class if flavor.class == Array + return @back.last.type == flavor if @back.last.class == Token + @back.last.class == flavor + end + + #i could determine the type here +end + +# int main(void) [;|{] +class FunctionHeader < Construct + def initialize(stream) + super + end + + def undo + @stream.prepend @back + end + + def consume + @back << @stream.shift + if [:int, :void].include? @back.last.type + @back << @stream.shift + end + + if @back.last.type == :ident + @back << @stream.shift + end + if @back.last.type == :oparn + @back << @stream.shift + end + if [:int, :void].include? @back.last.type + @back << @stream.shift + end + if @back.last.type == :cparn + @back << @stream.shift + end + if @back.last.type == :obrac + @back << @stream.shift + return self + end + + undo + return + end +end + +class Token + attr_accessor :text + attr_accessor :type + + def initialize(text, type) + @text, @type = text, type + end + + def to_s + "#{@type}: #{@text}" + end +end + +class Matcher + def initialize(hash) + @hash = hash + end + + def self.match(hash, token) + hash.each {|k, v| + return Token.new(token, k) if token =~ v + } + + return nil + end + + #modify the token if its a keyword + def self.match_token(hash, token) + hash.each {|k,v| + token.type = k if token.text =~ v + } + + return token + end +end + +class StringLiteral + def initialize(str) + @text = str + @type = :string + end + + def to_s + "#{@type}: #{@text}" + end +end + +#this could just be a function +class StringSweeper < Construct + def initialize(stream) + super stream + @string_mode = false + end + + def create_string(stream) + StringLiteral.new stream.map { |s| s.text }.join + end + + def sweep + until @stream.empty? + bite_until [:quote, nil] #everything before string + @back << create_string(deep_bite(:quote)) + back_up = @back.pop + @back.pop #remove extra quote + @back << back_up + discard_until :quote #replace all tokens with string + end + puts @back + end +end + +class XMLBody < Construct + def initialize(stream) + + end +end + +class XMLClose < Construct + def initialize(stream) + + end +end + +class XMLOpen < Construct + def initialize(stream) + + end +end + +class XML < Construct + def initialize(stream) + super stream + end + + def consume + bite_until :close + XMLOpen.new(@back) + end +end + +class Expr < Construct + @@tokens = { + :addop => /\+/, + :subop => /\-/, + :mulop => /\*/, + :divop => /\//, + } + + @@type = :exprn + + def self.tokens + @@tokens + end + + def initialize(stream) + super(stream) + end + + #parse the expression first + #determine what kind of expression + #it is after + + def consume + #first token could be + #ident, const, minus, oparn, func_call + # + + if Lexer.keywords.include? lick + puts "we have a variable (or function) declaration" + elsif lick == :ident + puts "we may have an assignment expression" + elsif deep_lick(:semic).include? :equal + puts "we definitely have an assignment expression" + bite_until :semic + puts @back + else + puts "we have an invalid expression" + end + end + +end + +class Lexer + @@tokens = { + :ident => /[a-zA-Z_]\w*\b/, + :const => /[0-9]+\b/, + :equal => /\=/, + :oparn => /\(/, + :cparn => /\)/, + :obrac => /{/, + :cbrac => /}/, + :quote => /\"/, + :semic => /;/, + :white => /\s/, + }.merge Expr.tokens + + @@xml_tokens = { + :ident => /[a-zA-Z_]\w*/, + :const => /[0-9]+/, + :open_ => /\ /\>/, + :equal => /\=/, + :slash => /\//, + :quote => /\"/, + :white => /\s/, + } + + @@keywords = { + :int => /int\b/, + :short => /short\b/, + :long => /long\b/, + :longlong => /long\ long\b/, + :void => /void\b/, + :return => /return\b/, + } + + def self.keywords + @@keywords.keys + end + + def initialize(file) + error "expected input file" if file.nil? + @source = File.read file + @xml = false + @xml = true if File.extname(file) == ".xml" + end + + def combine_id(tokens) + skips = 0 + tokens.filter_map.with_index {|token, idx| + if skips != 0 + skips -= 1 + next + end + + next token unless token.type == :ident + + while tokens[idx += 1].type == :ident + token.text << tokens[idx].text + skips += 1 + end + + next token + } + end + + def no_whitespace(tokens) + tokens.filter { |tok| tok.type != :white } + end + + def match_keywords(tokens) + tokens.map {|token| + Matcher.match_token(@@keywords, token) + } + end + + def lex_xml + tokens = @source.chars.filter_map { |char| + Matcher.match(@@xml_tokens, char) + } + combine_id tokens + end + + def lex + return lex_xml if @xml + + tokens = @source.chars.filter_map { |char| + Matcher.match(@@tokens, char) + } + + #might be bad for strings with spaces in them + no_whitespace match_keywords combine_id tokens + end +end + +lexer = Lexer.new(ARGV[0]) +stream = lexer.lex +swept = StringSweeper.new(stream) +swept.sweep + diff --git a/main.xml b/main.xml new file mode 100644 index 0000000..12ed982 --- /dev/null +++ b/main.xml @@ -0,0 +1,3 @@ + + wow + diff --git a/return_2.c b/return_2.c new file mode 100644 index 0000000..0d77fd6 --- /dev/null +++ b/return_2.c @@ -0,0 +1,2 @@ +int x = 2 + 4 * 3 - 8; +y = 23;