#! /usr/bin/env python

#    cvs-importer - import all existing versions of a program into cvs
#    Copyright (C) 2001  Matthew Mueller <donut@azstarnet.com>
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#required for python 2.1 support:
from __future__ import nested_scopes

import os,sys,re,time
import tempfile,shutil,fnmatch

__version__ = "0.2.1"

verbose = 1
dry_run = 0

def try_int(s):
	try:
		return int(s)
	except ValueError:
		return s

re_prever = re.compile(r'^(.+)-pre(.*)$')
class Ver:
	def __init__(self, verstr, filename):
		self.verstr = verstr
		self.filename = filename
		self.tagver = re.sub('[^-_0-9a-zA-Z]', '_', verstr) #replace all non-allowed chars with _
	def sortable(self):
		r = re_prever.match(self.verstr)
		try:
			if r:
				v1 = map(int, r.group(1).split('.'))
				v2 = map(int, r.group(2).split('.'))
				
				self.sortver = tuple(v1[:-1] + [v1[-1]-1] + [99999] + v2)
			else:
				self.sortver = tuple(map(int, self.verstr.split('.')))
			return 1
		except ValueError:
			return 0
	def __repr__(self):
		return '<Ver 0x%x %s %s>'%(id(self),self.tagver,self.filename)


def unpack(ver):
	if verbose:
		print 'extracting %s'%ver.filename
	l1 = os.listdir(os.curdir)
	os.system('aunpack'+mkshellarg(ver.filename))
	l2 = os.listdir(os.curdir)
	for d in l2:
		if d not in l1:
			return d
	raise Exception, 'nothing unpacked?'

def uniq(seq):
	us = []
	for s in seq:
		if s not in us:
			us.append(s)
	return us

def get_recursive_file_list(path, filefilter=os.path.isfile):
	l = []
	for f in os.listdir(path):
		f = os.path.join(path, f)
		if filefilter(f):
			l.append(f)
		if os.path.isdir(f):
			l.extend(get_recursive_file_list(f, filefilter))
	return l
def get_recursive_dir_list(path, filefilter=os.path.isdir):
	return get_recursive_file_list(path, filefilter)

def exitstatus_str(st):
	if os.WIFSTOPPED(st):
		return 'stopped: sig %i'%os.WSTOPSIG(st)
	if os.WIFSIGNALED(st):
		return 'killed: sig %i'%os.WTERMSIG(st)
	if os.WIFEXITED(st):
		return 'exit status: %i'%os.WEXITSTATUS(st)
	return '(??: %i)'%st

try:
	import commands
	def getoutput_checkstatus(cmd):
		if verbose:
			print 'running %r...'%cmd,
		st,p=commands.getstatusoutput(cmd)
		if st:
			print '%s (%r)'%(exitstatus_str(st),p)
			sys.exit(1)
		if verbose:
			print `p`
		return p
	mkshellarg = commands.mkarg
except ImportError:
	getoutput_checkstatus = None
	def mkshellarg(a): return ' "%s"'%a

def do_system(cmd):
	if verbose:
		print 'system(%r)'%cmd
	if dry_run: return
	s = os.system(cmd)
	if s: raise Exception, "system(%r): returned %s"%(cmd,s)

def do_safesystem(cmd, *args):
	if verbose:
		print 'spawnlp(%s, %s, %s, %s)'%(os.P_WAIT, cmd, cmd, ', '.join(args))
	if dry_run: return
	s = os.spawnlp(os.P_WAIT, cmd, cmd, *args)
	if s: raise Exception, "spawnlp(%s, %s, %s, %s) returned %s"%(os.P_WAIT, cmd, cmd, ', '.join(args), s)

def do_chdir(path):
	if verbose:
		print 'chdir(%r)'%path,
	os.chdir(path)
	if verbose:
		print '->',os.getcwd()

def do_rmtree(path):
	if verbose:
		print 'rmtree(%r)'%path
	shutil.rmtree(path)


def get_module_versions(module_name, arg, no_sort=0):
	module_versions = []
	
	def try_add_tarball(fdir, fn, force=0):
		verstr = None

		def get_verstr(fntail):
			for ext in ('.tar.gz', '.tar.bz2'):
				if fntail.endswith(ext):
					return fntail[:-len(ext)]
			if force:
				return fntail[:fntail.rfind('.')]

		if fn.startswith(module_name+'-'):
			verstr = get_verstr(fn[len(module_name)+1:])
		elif force:
			r = re.match('.+-(\d.*)$', fn)
			if r:
				verstr = get_verstr(r.group(1))
					
		if verstr:
			mver = Ver(verstr, os.path.join(fdir, fn))
			if no_sort or mver.sortable():
				module_versions.append(mver)
			else:
				print "ignoring %s since I can't parse its version"%fn

	if os.path.isdir(arg):	
		for fn in os.listdir(arg):
			try_add_tarball(arg, fn)
		return sort_module_versions(module_versions)
	else:
		fdir, fn = os.path.split(arg)
		try_add_tarball(fdir, fn, force=1)
		return module_versions
		
	
def sort_module_versions(module_versions):
	sorter = [(v.sortver, v) for v in module_versions]
	sorter.sort()
	if verbose:
		for v in sorter:
			print v
	return [v[1] for v in sorter]

class Importer:
	vendor_str = 'vendor'
	def __init__(self):
		self.exclude_pats = []
		self.changelog_name = None
		self.keep_generated_files = 0
		self.oldchangelog = []
		self.auto_cvsignore = 1
		self.global_ignore = []
		self.local_ignore = []
		self.filetype_pats = []
		self.no_sort = 0
	
	def do_remove_generated_files(self, path):
		if not self.keep_generated_files:
			allfiles = get_recursive_file_list(path)
			alldirs = get_recursive_dir_list(path)
			generated = []
			generated_dirs = []
			#reset the cvsignore lists so that we use the data from the latest version of the source only.
			self.global_ignore = []
			self.local_ignore = []
			def add_generated(g, cvsignore=1):
				if g in allfiles and g not in generated:
					generated.append(g)
				elif g in alldirs and g not in generated_dirs:
					generated_dirs.append(g)
				if cvsignore and g not in self.local_ignore:
					self.local_ignore.append(g)
			def add_global_ignore(pat):
				if pat not in self.global_ignore:
					self.global_ignore.append(pat)
				
			for s in [f for f in allfiles if f.endswith('.in')]:
				add_generated(s[:-3])
				if os.path.split(s)[1]=='configure.in':
					for g in ('config.status','config.log','config.cache'):
						add_generated(os.path.join(os.path.split(s)[0], g))
			
			for s in [f for f in allfiles if f.endswith('.am')]:
				add_generated(s[:-3]+'.in')
			
			for s in [f for f in allfiles if f.endswith('.py')]:
				add_generated(s[:-3]+'.pyc', 0)
				add_generated(s[:-3]+'.pyo', 0)
				add_global_ignore('*.py[co]')
			
			#distutils setup.py sdist include a generated "PKG-INFO" in the tarball.
			for s in [f for f in allfiles if os.path.split(f)[1]=='setup.py']:
				add_generated(os.path.join(os.path.split(s)[0],'PKG-INFO')) #included in tarballs made by setup.py sdist
				add_generated(os.path.join(os.path.split(s)[0],'build')) #created by setup.py build
				add_generated(os.path.join(os.path.split(s)[0],'dist')) #created by setup.py sdist

			if generated:
				if verbose:
					print 'removing "generated" files:',generated
				map(os.unlink, generated)
			if generated_dirs:
				if verbose:
					print 'removing "generated" dirs:',generated_dirs
				map(shutil.rmtree, generated_dirs)

		
		if self.exclude_pats:
			excluded = [f for f in allfiles for p in self.exclude_pats if fnmatch.fnmatch(os.path.split(f)[1], p)]
			if excluded:
				if verbose:
					print 'removing excluded files:',excluded
				map(os.unlink, excluded)
			
			excluded = [f for f in alldirs for p in self.exclude_pats if fnmatch.fnmatch(os.path.split(f)[1], p)]

			if excluded:
				if verbose:
					print 'removing excluded dirs:',excluded
				map(shutil.rmtree, excluded)

	def add_filetype_pat(self, ftype, pat):
		assert ftype in ('text','binary')
		self.filetype_pats.append((ftype, pat))

	def determine_filetypes(self, files):
		"""list of filenames -> list of text files, list of binary files"""
		#return files, []

		filetypes = {}
		for f in files:
			filetypes[f] = None

		for ftype, pat in self.filetype_pats:
			for f in files:
				if fnmatch.fnmatch(os.path.split(f)[1], pat):
					filetypes[f] = ftype

		for f, ftype in filetypes.items():
			if not ftype:
				if os.path.getsize(f)==0 or not getoutput_checkstatus:
					o='text'
				else:
					o = getoutput_checkstatus('file'+mkshellarg(f))
				if o.find('text')>=0 :
					filetypes[f]='text'
				else:
					filetypes[f]='binary'

		return [f for f in files if filetypes[f]=='text'], [f for f in files if filetypes[f]=='binary']

	def get_change_message(self, ver, change_type):
		change_message = '%s of %s %s'%(change_type, self.module_name, ver.verstr)
		if self.changelog_name==-1:
			return change_message
		if self.changelog_name==None:
			changelogs = [f for f in os.listdir(os.curdir) if f.lower()=='changelog']
			if not changelogs:
				return change_message
			if len(changelogs)>1:
				pass #XXX
			self.changelog_name = changelogs[0]

		newchangelog = open(self.changelog_name).read().strip().splitlines(1)
		newlines = [l for l in newchangelog if l not in self.oldchangelog]
		dellines = [l for l in self.oldchangelog if l not in newchangelog and l.strip!='']
		if dellines:
			pass #XXX
		self.oldchangelog = newchangelog
		return ''.join(newlines)
		
	def do_import(self, module_name, args):
		if not args:
			args = [os.getcwd()]
		args = [os.path.abspath(f) for f in args]

		self.module_name = module_name
		olddir = os.getcwd()
		try:
			tmpdir = tempfile.mktemp()
			os.mkdir(tmpdir, 0700)
			cvsdir = tempfile.mktemp()
			os.mkdir(cvsdir, 0700)
			cvsrootdir = tempfile.mktemp()
			os.mkdir(cvsrootdir, 0700)
			do_chdir(tmpdir)
		
			module_versions = []
			for a in args:
				module_versions.extend(get_module_versions(module_name, a, self.no_sort))
			if not self.no_sort:
				module_versions = sort_module_versions(module_versions)
		
			# import the initial version
			vdir = 'tmp-import'
			os.mkdir(vdir)
			do_chdir(vdir)
			
			do_safesystem('cvs','import','-m','create %s cvs'%module_name, module_name, self.vendor_str, 'start')
			#do_system('cvs import -m %r %(module)s %(vendor)s %(module)s-%(tagver)s'%{'module':module_name, 'ver':ver.verstr, 'vendor':vendor_str, 'tagver':ver.tagver})
			
			oldfiles = []
			olddirs = []

			do_chdir(tmpdir)
			do_rmtree(vdir)

			# checkout the cvs so we can update it
			do_system('cvs checkout -d %s %s'%(cvsdir, module_name))

			# "import" all the rest of the versions
			for ver in module_versions:
				if not dry_run: time.sleep(1) #sleep for a second to ensure that each version has a different timestamp
				do_chdir(tmpdir)
				vdir = unpack(ver)
				do_chdir(vdir)
			
				self.do_remove_generated_files(os.curdir)
				
				newfiles = get_recursive_file_list(os.curdir)
				newdirs = get_recursive_dir_list(os.curdir)

				rmfiles = [f for f in oldfiles if f not in newfiles]
				# with cvs you don't manually remove dirs, just when they become empty cvs up -P gets rid of em.
				#rmdirs = [f for f in olddirs if f not in newdirs]

				addfiles = [f for f in newfiles if f not in oldfiles]
				adddirs = [f for f in newdirs if f not in olddirs]

				addtext, addbinary = self.determine_filetypes(addfiles)
				
				do_chdir(cvsdir)

				map(os.makedirs, adddirs)
				for f in newfiles:
					if verbose:
						print 'moving %s -> %s'%(os.path.join(tmpdir, vdir, f), os.path.join(cvsdir, f))
					os.rename(os.path.join(tmpdir, vdir, f), os.path.join(cvsdir, f))

				if rmfiles:
					if verbose:
						print 'removing outdated files:',rmfiles
					map(os.unlink, rmfiles)
					do_safesystem('cvs','rm',*rmfiles)

				if adddirs:
					do_safesystem('cvs','add',*adddirs)
				if addtext:
					do_safesystem('cvs','add',*addtext)
				if addbinary:
					print 'importing as BINARY:',addbinary
					do_safesystem('cvs','add','-kb',*addbinary)

				change_message = self.get_change_message(ver, 'Commit')
				do_safesystem('cvs','commit','-m',change_message)
				do_system('cvs tag %s-%s'%(module_name,ver.tagver))
				do_system('cvs update -PAd')

				oldfiles = newfiles
				olddirs = newdirs
				
				do_chdir(tmpdir)
				do_rmtree(vdir)


			if self.auto_cvsignore:
				if self.global_ignore:
					do_system('cvs checkout -d %s %s'%(cvsrootdir, 'CVSROOT'))
					do_chdir(cvsrootdir)
					if verbose:
						print 'adding patterns to global cvsignore:',self.global_ignore
					open('cvsignore','a').write(' '.join(self.global_ignore)+'\n')
					do_system('cvs add cvsignore')
					do_safesystem('cvs','commit','-m','global cvsignore autogenerated by cvs-importer')
					
				if self.local_ignore:
					do_chdir(cvsdir)
					alldirs = get_recursive_dir_list(os.curdir)
					for d in [os.curdir]+alldirs:
						ignored = [os.path.normpath(i) for i in self.local_ignore if os.path.normpath(os.path.split(i)[0])==os.path.normpath(d)]
						if ignored:
							if verbose:
								print 'adding patterns to %s:'%os.path.join(d,'.cvsignore'),ignored
							open(os.path.join(d,'.cvsignore'),'a').write(' '.join(ignored)+'\n')
							do_safesystem('cvs','add',os.path.join(d,'.cvsignore'))
					do_safesystem('cvs','commit','-m','.cvsignore autogenerated by cvs-importer')
			
			print 'cvs-importer completed successfully'
		finally:
			if verbose:
				print "(main loop cleanup)"
			do_chdir(olddir)
			do_rmtree(tmpdir)
			do_rmtree(cvsdir)
			do_rmtree(cvsrootdir)
			if verbose:
				print "(main loop cleanup done)"

def printusage():
	print 'Usage: cvs-importer [opts] <module-name> [tarball directories and/or tarballs]'
	print 'the cvs repository must already be initialized (cvs init)'

def printhelp():
	print 'cvs-importer v%s - Copyright (C) 2001 Matthew Mueller - GPL license'%__version__
	printusage()
	print '-x/--exclude <pat>      file pattern to exclude from importing into cvs'
	print '-t/--text <pat>         file pattern to force import as text'
	print '-b/--binary <pat>       file pattern to force import as binary'
	print '--keep-generated-files  disable automatic removal of "generated" files'
	print '--no-cvsignore          disable automatic cvsignore of "generated" files'
	print '--no-sort               disable version sorting of command line specified files'
	print '--ignore-changelog      disable usage of changelog file for commit messages'
	print '--changelog <filename>  force the use of a certain file for the changelog'
	print '-n/--dry-run            don\'t actually run any cvs commands, just print them'
	print '-h/--help               show help'
	print ' --version              show version'
	

def main(argv):
	import getopt

	try:
		optlist, args = getopt.getopt(argv[1:], 'x:hnt:b:', ['help','version','exclude=','keep-generated-files','ignore-changelog','changelog=','no-cvsignore','dry-run','text=','binary=','no-sort'])
	except getopt.error, a:
		print "cvs-importer: %s"%a
		printusage()
		sys.exit(1)
	
	importer = Importer()

	for o,a in optlist:
		if o=='-x' or o=='--exclude':
			importer.exclude_pats.append(a)
		elif o=='-t' or o=='--text':
			importer.add_filetype_pat('text', a)
		elif o=='-b' or o=='--binary':
			importer.add_filetype_pat('binary', a)
		elif o=='--keep-generated-files':
			importer.keep_generated_files = 1
		elif o=='--changelog':
			importer.changelog_name = a
		elif o=='--ignore-changelog':
			importer.changelog_name = -1
		elif o=='--no-cvsignore':
			importer.auto_cvsignore = 0
		elif o=='--no-sort':
			importer.no_sort = 1
		elif o=='-n' or o=='--dry-run':
			global dry_run
			dry_run = 1
		elif o=='-h' or o=='-?' or o=='--help':
			printhelp()
			sys.exit(0)
		elif o=='--version':
			print __version__
			sys.exit(0)

	if len(args)<1:
		printusage()
		sys.exit(1)

	module_name = args[0]

	importer.do_import(module_name, args[1:])

if __name__=="__main__":
	main(sys.argv)
