require 'date'
# Variables
wp_prefix = 'wp_' # WP Table Prefix
pid = 10 # Post ID to start at
cid = 10 # Comment ID to start at
class Post
attr_accessor :title
attr_accessor :body
attr_accessor :date
attr_accessor :author
attr_accessor :comments
def initialize
@comments = Array.new
end
end
# Recursive function to grab all entries
def process_dir posts, dirname, root = "."
entries = Dir.entries(root + "/" + dirname)
entries.each do |fn|
if (fn != "." && fn != "..") then
fullname = root + "/" + dirname + "/" + fn
if (File.directory?(fullname)) then
process_dir(posts, fn, root + "/" + dirname) if !(fn.match("entry"))
else
post = parse_post(fullname, fn)
# Get comments
#puts fullname[0..-5]
commentdir = fullname[0..-5] + "/comments/"
if File.directory?(commentdir) then
Dir.entries(commentdir).each do |cn|
if cn != "." && cn != ".." && cn.match("comment") then
comment = (parse_post(commentdir + "/" + cn, cn, true))
post.comments.push(comment)
end
end
end
posts.push(post)
end
end
end
end
# Parses an entry*.txt file or a comment*.txt file
def parse_post(fullname, fn, iscomment = false)
buffer = ""
File.open(fullname).each do |line|
buffer = buffer + line + "\n"
end
#puts fullname + "\n"
tokens = buffer.split("|"); #TODO cope with escaped |
post = Post.new
if (tokens[0] == "VERSION") then
# New version post
i = 0
while (i < tokens.size) do
case tokens[i]
when 'SUBJECT'
post.title = tokens[i+1]
when 'CONTENT'
post.body = tokens[i+1]
when 'NAME'
post.author = tokens[i+1]
end
i = i + 2
end
else
# Old post
post.title = tokens[0] if !iscomment;
post.author = tokens[0] if iscomment;
post.body = tokens[2];
end
# Not sure the best way to parse dates...
offset = fn.match("entry") ? 5 : 7
year = fn[offset+0,2]
month = fn[offset+2,2]
day = fn[offset+4,2]
hour = fn[offset+7,2]
minute = fn[offset+9,2]
second = fn[offset+11,2]
post.date = Date.strptime(year + " " + month + " " + day + " " + hour + " " + minute + " " + second, "%y %m %d %H %M %S");
return post;
end
# Create object tree
posts = Array.new
process_dir posts, "content", "."
# Process BBCode
def bb2html str
str = str.gsub(/\[([a-zA-Z]+)\](.*?)\[\/\1\]/m, '<\1>\2\1>')
str = str.gsub(/\[([a-zA-Z]+)\](.*?)\[\/\1\]/m, '<\1>\2\1>')
str = str.gsub(/\[url=([^\]]*)\](.*?)\[\/url\]/m, '\2')
str = str.gsub(/\[img=([^\]]*)\]/m, '
')
end
# Create a url friendly title
def titleize str
str = str.gsub(/[^A-Za-z\s]/, '')
str = str.downcase
str = str.gsub(/\s/, '-')
end
# Output SQL
posts.each do |p|
sql = "INSERT INTO #{wp_prefix}posts (id, post_author, post_date, post_date_gmt, "
sql = sql + "post_content, post_title, post_name, comment_count) "
sql = sql + " VALUES (#{pid}, 1, '#{p.date}', '#{p.date}', "
sql = sql + "'#{bb2html(p.body).gsub(/'/,'\'')}', "
sql = sql + "'#{bb2html(p.title).gsub(/'/,'\'')}', "
sql = sql + "'#{titleize(p.title).gsub(/'/,'\'')}', "
sql = sql + "#{p.comments.size});\n"
puts sql
p.comments.each do |c|
sql = "INSERT INTO #{wp_prefix}comments (comment_id, comment_post_id, comment_content, "
sql = sql + "comment_author, comment_date, comment_date_gmt) VALUES ("
sql = sql + "#{cid}, #{pid}, '#{bb2html(c.body).gsub(/'/,'\'')}', "
sql = sql + "'#{c.author.gsub(/'/,'\'')}', '#{c.date}', "
sql = sql + "'#{p.date}');\n"
puts sql
cid = cid + 1
end
pid = pid + 1
end