xml
parent
609dbf39d2
commit
6c8e3c8c20
@ -0,0 +1,365 @@
|
||||
def error(msg)
|
||||
puts "ERROR: #{msg}"
|
||||
exit 1
|
||||
end
|
||||
|
||||
class Program
|
||||
def initialize()
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
#FunctionHeader + Semicolon = FunctionPrototype
|
||||
#FunctionHeader + Bracket = FunctionDeclaration
|
||||
#
|
||||
#Type + Identifier + [Identifier] + Semicolon = VarDecl
|
||||
#Type + Identifier + Equal + Expr = VarDeclWithValue
|
||||
|
||||
class Construct
|
||||
def initialize(stream)
|
||||
@stream, @back = stream, []
|
||||
end
|
||||
|
||||
def spit
|
||||
@stream.prepend @back.pop
|
||||
end
|
||||
|
||||
def vomit
|
||||
@back.reverse.each { spit }
|
||||
end
|
||||
|
||||
def bite(flavor)
|
||||
@back << @stream.shift
|
||||
#bite flavor if @back.last.type == :white
|
||||
taste_like? flavor
|
||||
end
|
||||
|
||||
def bite_until(flavor)
|
||||
until bite flavor
|
||||
return if @stream.empty?
|
||||
end
|
||||
end
|
||||
|
||||
def discard_until(flavor)
|
||||
null = nil
|
||||
until null == flavor
|
||||
h = @stream.shift
|
||||
break if h.nil?
|
||||
null = h.type
|
||||
end
|
||||
end
|
||||
|
||||
#lick to get flavor
|
||||
def lick
|
||||
return @stream.first.type if @stream.first.type == Token
|
||||
return @stream.first.class
|
||||
end
|
||||
|
||||
#lick until stopping point
|
||||
def deep_lick(stop)
|
||||
tongue = []
|
||||
@stream.each {|s|
|
||||
break if s.type == stop
|
||||
tongue << s.type if s.class == Token
|
||||
tongue << s.class if s.class != Token
|
||||
}
|
||||
return tongue
|
||||
end
|
||||
|
||||
def deep_bite(stop)
|
||||
tongue = []
|
||||
@stream.each {|s|
|
||||
break if s.type == stop
|
||||
tongue << s
|
||||
}
|
||||
return tongue
|
||||
end
|
||||
|
||||
def taste_like?(flavor)
|
||||
return true if flavor == :any
|
||||
return flavor.include? @back.last.class == Token ? @back.last.type : @back.last.class if flavor.class == Array
|
||||
return @back.last.type == flavor if @back.last.class == Token
|
||||
@back.last.class == flavor
|
||||
end
|
||||
|
||||
#i could determine the type here
|
||||
end
|
||||
|
||||
# int main(void) [;|{]
|
||||
class FunctionHeader < Construct
|
||||
def initialize(stream)
|
||||
super
|
||||
end
|
||||
|
||||
def undo
|
||||
@stream.prepend @back
|
||||
end
|
||||
|
||||
def consume
|
||||
@back << @stream.shift
|
||||
if [:int, :void].include? @back.last.type
|
||||
@back << @stream.shift
|
||||
end
|
||||
|
||||
if @back.last.type == :ident
|
||||
@back << @stream.shift
|
||||
end
|
||||
if @back.last.type == :oparn
|
||||
@back << @stream.shift
|
||||
end
|
||||
if [:int, :void].include? @back.last.type
|
||||
@back << @stream.shift
|
||||
end
|
||||
if @back.last.type == :cparn
|
||||
@back << @stream.shift
|
||||
end
|
||||
if @back.last.type == :obrac
|
||||
@back << @stream.shift
|
||||
return self
|
||||
end
|
||||
|
||||
undo
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
class Token
|
||||
attr_accessor :text
|
||||
attr_accessor :type
|
||||
|
||||
def initialize(text, type)
|
||||
@text, @type = text, type
|
||||
end
|
||||
|
||||
def to_s
|
||||
"#{@type}: #{@text}"
|
||||
end
|
||||
end
|
||||
|
||||
class Matcher
|
||||
def initialize(hash)
|
||||
@hash = hash
|
||||
end
|
||||
|
||||
def self.match(hash, token)
|
||||
hash.each {|k, v|
|
||||
return Token.new(token, k) if token =~ v
|
||||
}
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
#modify the token if its a keyword
|
||||
def self.match_token(hash, token)
|
||||
hash.each {|k,v|
|
||||
token.type = k if token.text =~ v
|
||||
}
|
||||
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
class StringLiteral
|
||||
def initialize(str)
|
||||
@text = str
|
||||
@type = :string
|
||||
end
|
||||
|
||||
def to_s
|
||||
"#{@type}: #{@text}"
|
||||
end
|
||||
end
|
||||
|
||||
#this could just be a function
|
||||
class StringSweeper < Construct
|
||||
def initialize(stream)
|
||||
super stream
|
||||
@string_mode = false
|
||||
end
|
||||
|
||||
def create_string(stream)
|
||||
StringLiteral.new stream.map { |s| s.text }.join
|
||||
end
|
||||
|
||||
def sweep
|
||||
until @stream.empty?
|
||||
bite_until [:quote, nil] #everything before string
|
||||
@back << create_string(deep_bite(:quote))
|
||||
back_up = @back.pop
|
||||
@back.pop #remove extra quote
|
||||
@back << back_up
|
||||
discard_until :quote #replace all tokens with string
|
||||
end
|
||||
puts @back
|
||||
end
|
||||
end
|
||||
|
||||
class XMLBody < Construct
|
||||
def initialize(stream)
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
class XMLClose < Construct
|
||||
def initialize(stream)
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
class XMLOpen < Construct
|
||||
def initialize(stream)
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
class XML < Construct
|
||||
def initialize(stream)
|
||||
super stream
|
||||
end
|
||||
|
||||
def consume
|
||||
bite_until :close
|
||||
XMLOpen.new(@back)
|
||||
end
|
||||
end
|
||||
|
||||
class Expr < Construct
|
||||
@@tokens = {
|
||||
:addop => /\+/,
|
||||
:subop => /\-/,
|
||||
:mulop => /\*/,
|
||||
:divop => /\//,
|
||||
}
|
||||
|
||||
@@type = :exprn
|
||||
|
||||
def self.tokens
|
||||
@@tokens
|
||||
end
|
||||
|
||||
def initialize(stream)
|
||||
super(stream)
|
||||
end
|
||||
|
||||
#parse the expression first
|
||||
#determine what kind of expression
|
||||
#it is after
|
||||
|
||||
def consume
|
||||
#first token could be
|
||||
#ident, const, minus, oparn, func_call
|
||||
#
|
||||
|
||||
if Lexer.keywords.include? lick
|
||||
puts "we have a variable (or function) declaration"
|
||||
elsif lick == :ident
|
||||
puts "we may have an assignment expression"
|
||||
elsif deep_lick(:semic).include? :equal
|
||||
puts "we definitely have an assignment expression"
|
||||
bite_until :semic
|
||||
puts @back
|
||||
else
|
||||
puts "we have an invalid expression"
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
class Lexer
|
||||
@@tokens = {
|
||||
:ident => /[a-zA-Z_]\w*\b/,
|
||||
:const => /[0-9]+\b/,
|
||||
:equal => /\=/,
|
||||
:oparn => /\(/,
|
||||
:cparn => /\)/,
|
||||
:obrac => /{/,
|
||||
:cbrac => /}/,
|
||||
:quote => /\"/,
|
||||
:semic => /;/,
|
||||
:white => /\s/,
|
||||
}.merge Expr.tokens
|
||||
|
||||
@@xml_tokens = {
|
||||
:ident => /[a-zA-Z_]\w*/,
|
||||
:const => /[0-9]+/,
|
||||
:open_ => /\</,
|
||||
:close => /\>/,
|
||||
:equal => /\=/,
|
||||
:slash => /\//,
|
||||
:quote => /\"/,
|
||||
:white => /\s/,
|
||||
}
|
||||
|
||||
@@keywords = {
|
||||
:int => /int\b/,
|
||||
:short => /short\b/,
|
||||
:long => /long\b/,
|
||||
:longlong => /long\ long\b/,
|
||||
:void => /void\b/,
|
||||
:return => /return\b/,
|
||||
}
|
||||
|
||||
def self.keywords
|
||||
@@keywords.keys
|
||||
end
|
||||
|
||||
def initialize(file)
|
||||
error "expected input file" if file.nil?
|
||||
@source = File.read file
|
||||
@xml = false
|
||||
@xml = true if File.extname(file) == ".xml"
|
||||
end
|
||||
|
||||
def combine_id(tokens)
|
||||
skips = 0
|
||||
tokens.filter_map.with_index {|token, idx|
|
||||
if skips != 0
|
||||
skips -= 1
|
||||
next
|
||||
end
|
||||
|
||||
next token unless token.type == :ident
|
||||
|
||||
while tokens[idx += 1].type == :ident
|
||||
token.text << tokens[idx].text
|
||||
skips += 1
|
||||
end
|
||||
|
||||
next token
|
||||
}
|
||||
end
|
||||
|
||||
def no_whitespace(tokens)
|
||||
tokens.filter { |tok| tok.type != :white }
|
||||
end
|
||||
|
||||
def match_keywords(tokens)
|
||||
tokens.map {|token|
|
||||
Matcher.match_token(@@keywords, token)
|
||||
}
|
||||
end
|
||||
|
||||
def lex_xml
|
||||
tokens = @source.chars.filter_map { |char|
|
||||
Matcher.match(@@xml_tokens, char)
|
||||
}
|
||||
combine_id tokens
|
||||
end
|
||||
|
||||
def lex
|
||||
return lex_xml if @xml
|
||||
|
||||
tokens = @source.chars.filter_map { |char|
|
||||
Matcher.match(@@tokens, char)
|
||||
}
|
||||
|
||||
#might be bad for strings with spaces in them
|
||||
no_whitespace match_keywords combine_id tokens
|
||||
end
|
||||
end
|
||||
|
||||
lexer = Lexer.new(ARGV[0])
|
||||
stream = lexer.lex
|
||||
swept = StringSweeper.new(stream)
|
||||
swept.sweep
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
<root attr="hello world">
|
||||
<subroot attr="yes">wow</subroot>
|
||||
</root>
|
||||
@ -0,0 +1,2 @@
|
||||
int x = 2 + 4 * 3 - 8;
|
||||
y = 23;
|
||||
Loading…
Reference in New Issue