require 'date' # Variables wp_prefix = 'wp_' # WP Table Prefix pid = 10 # Post ID to start at cid = 10 # Comment ID to start at class Post attr_accessor :title attr_accessor :body attr_accessor :date attr_accessor :author attr_accessor :comments def initialize @comments = Array.new end end # Recursive function to grab all entries def process_dir posts, dirname, root = "." entries = Dir.entries(root + "/" + dirname) entries.each do |fn| if (fn != "." && fn != "..") then fullname = root + "/" + dirname + "/" + fn if (File.directory?(fullname)) then process_dir(posts, fn, root + "/" + dirname) if !(fn.match("entry")) else post = parse_post(fullname, fn) # Get comments #puts fullname[0..-5] commentdir = fullname[0..-5] + "/comments/" if File.directory?(commentdir) then Dir.entries(commentdir).each do |cn| if cn != "." && cn != ".." && cn.match("comment") then comment = (parse_post(commentdir + "/" + cn, cn, true)) post.comments.push(comment) end end end posts.push(post) end end end end # Parses an entry*.txt file or a comment*.txt file def parse_post(fullname, fn, iscomment = false) buffer = "" File.open(fullname).each do |line| buffer = buffer + line + "\n" end #puts fullname + "\n" tokens = buffer.split("|"); #TODO cope with escaped | post = Post.new if (tokens[0] == "VERSION") then # New version post i = 0 while (i < tokens.size) do case tokens[i] when 'SUBJECT' post.title = tokens[i+1] when 'CONTENT' post.body = tokens[i+1] when 'NAME' post.author = tokens[i+1] end i = i + 2 end else # Old post post.title = tokens[0] if !iscomment; post.author = tokens[0] if iscomment; post.body = tokens[2]; end # Not sure the best way to parse dates... offset = fn.match("entry") ? 5 : 7 year = fn[offset+0,2] month = fn[offset+2,2] day = fn[offset+4,2] hour = fn[offset+7,2] minute = fn[offset+9,2] second = fn[offset+11,2] post.date = Date.strptime(year + " " + month + " " + day + " " + hour + " " + minute + " " + second, "%y %m %d %H %M %S"); return post; end # Create object tree posts = Array.new process_dir posts, "content", "." # Process BBCode def bb2html str str = str.gsub(/\[([a-zA-Z]+)\](.*?)\[\/\1\]/m, '<\1>\2') str = str.gsub(/\[([a-zA-Z]+)\](.*?)\[\/\1\]/m, '<\1>\2') str = str.gsub(/\[url=([^\]]*)\](.*?)\[\/url\]/m, '\2') str = str.gsub(/\[img=([^\]]*)\]/m, '') end # Create a url friendly title def titleize str str = str.gsub(/[^A-Za-z\s]/, '') str = str.downcase str = str.gsub(/\s/, '-') end # Output SQL posts.each do |p| sql = "INSERT INTO #{wp_prefix}posts (id, post_author, post_date, post_date_gmt, " sql = sql + "post_content, post_title, post_name, comment_count) " sql = sql + " VALUES (#{pid}, 1, '#{p.date}', '#{p.date}', " sql = sql + "'#{bb2html(p.body).gsub(/'/,'\'')}', " sql = sql + "'#{bb2html(p.title).gsub(/'/,'\'')}', " sql = sql + "'#{titleize(p.title).gsub(/'/,'\'')}', " sql = sql + "#{p.comments.size});\n" puts sql p.comments.each do |c| sql = "INSERT INTO #{wp_prefix}comments (comment_id, comment_post_id, comment_content, " sql = sql + "comment_author, comment_date, comment_date_gmt) VALUES (" sql = sql + "#{cid}, #{pid}, '#{bb2html(c.body).gsub(/'/,'\'')}', " sql = sql + "'#{c.author.gsub(/'/,'\'')}', '#{c.date}', " sql = sql + "'#{p.date}');\n" puts sql cid = cid + 1 end pid = pid + 1 end