complearn 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+ require 'CompLearnLib/CLConfig'
5
+
6
+ setup = Hash.new('')
7
+
8
+ ARGV.options { |opt|
9
+
10
+ opt.banner = "Usage: labeltree tree.dot {dirname|filelist.txt}"
11
+ opt.on("")
12
+ opt.on("Relabels a tree's leaf vertices according to filenames")
13
+ opt.on("If a directory is specified, all files in that directory are used")
14
+ opt.on("If a file is specified, it must contain filenames one per line")
15
+ opt.on("The original unlabelled tree.dot will be renamed tree.dot.orig")
16
+
17
+ opt.on("Options:\n")
18
+ opt.on("--configfile filename", "-f", String,
19
+ "Read alternate configuration file") { |setup['cfgfile']| }
20
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
21
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
22
+
23
+ opt.parse!
24
+
25
+ if ARGV.size < 2
26
+ puts opt
27
+ exit(1)
28
+ end
29
+ }
30
+
31
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
32
+ cfg = CLConfig.getDefaultConfig
33
+
34
+ begin
35
+ treefile, objname = ARGV
36
+
37
+ files = cfg.getFilelistFromDirOrFile(objname)
38
+
39
+ newnames = { }
40
+
41
+ files.each_index { |i|
42
+ fname = File.basename(files[i])
43
+ fname.gsub!(/[.].*/, '')
44
+ fname.gsub!(/[^a-zA-Z0-9]+/, '')
45
+ fname = "zz#{fname}" if fname =~ /^[0-9]/ && fname =~ /[a-zA-Z]/
46
+ newnames[i.to_s] = fname
47
+ puts "Relabelling #{i} to #{fname}"
48
+ }
49
+
50
+ f = File.open(treefile, 'r')
51
+ orig = f.read
52
+
53
+ newnames.each { |k, v|
54
+ orig.gsub!(Regexp.new("^\s*#{k}\s*--"), "#{v} --")
55
+ orig.gsub!(Regexp.new("--\s*#{k}\s*$"), "-- #{v}")
56
+ }
57
+
58
+ File.rename(treefile, "#{treefile}.orig")
59
+
60
+ f = File.open(treefile, 'w')
61
+ f.write(orig)
62
+ f.close
63
+
64
+ rescue
65
+ puts "Exception: #{$!}"
66
+ end
67
+ exit(0)
68
+
@@ -0,0 +1,68 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+ require 'CompLearnLib/CLConfig'
5
+
6
+ setup = Hash.new('')
7
+
8
+ ARGV.options { |opt|
9
+
10
+ opt.banner = "Usage: labeltree tree.dot {dirname|filelist.txt}"
11
+ opt.on("")
12
+ opt.on("Relabels a tree's leaf vertices according to filenames")
13
+ opt.on("If a directory is specified, all files in that directory are used")
14
+ opt.on("If a file is specified, it must contain filenames one per line")
15
+ opt.on("The original unlabelled tree.dot will be renamed tree.dot.orig")
16
+
17
+ opt.on("Options:\n")
18
+ opt.on("--configfile filename", "-f", String,
19
+ "Read alternate configuration file") { |setup['cfgfile']| }
20
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
21
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
22
+
23
+ opt.parse!
24
+
25
+ if ARGV.size < 2
26
+ puts opt
27
+ exit(1)
28
+ end
29
+ }
30
+
31
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
32
+ cfg = CLConfig.getDefaultConfig
33
+
34
+ begin
35
+ treefile, objname = ARGV
36
+
37
+ files = cfg.getFilelistFromDirOrFile(objname)
38
+
39
+ newnames = { }
40
+
41
+ files.each_index { |i|
42
+ fname = File.basename(files[i])
43
+ fname.gsub!(/[.].*/, '')
44
+ fname.gsub!(/[^a-zA-Z0-9]+/, '')
45
+ fname = "zz#{fname}" if fname =~ /^[0-9]/ && fname =~ /[a-zA-Z]/
46
+ newnames[i.to_s] = fname
47
+ puts "Relabelling #{i} to #{fname}"
48
+ }
49
+
50
+ f = File.open(treefile, 'r')
51
+ orig = f.read
52
+
53
+ newnames.each { |k, v|
54
+ orig.gsub!(Regexp.new("^\s*#{k}\s*--"), "#{v} --")
55
+ orig.gsub!(Regexp.new("--\s*#{k}\s*$"), "-- #{v}")
56
+ }
57
+
58
+ File.rename(treefile, "#{treefile}.orig")
59
+
60
+ f = File.open(treefile, 'w')
61
+ f.write(orig)
62
+ f.close
63
+
64
+ rescue
65
+ puts "Exception: #{$!}"
66
+ end
67
+ exit(0)
68
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+
5
+ require 'CompLearnLib/Ncd.rb'
6
+
7
+ setup = Hash.new('')
8
+
9
+ TaskMaster.init()
10
+
11
+ ARGV.options { |opt|
12
+
13
+ opt.banner = "Usage: makesvm [options] filelist.txt"
14
+ opt.on("")
15
+ opt.on("Creates SVM classification training files from NCD's of listed files")
16
+ opt.on("filelist.txt must be a list of files, one per line")
17
+ opt.on("Each line must consist of:")
18
+ opt.on("<category> <type> <filename>")
19
+ opt.on("where <category> is an integer specifying the category")
20
+ opt.on("and <type> is either F, G, or P")
21
+ opt.on("F means this file is to be used as a feature extractor")
22
+ opt.on("G means this file is to be used for training (given)")
23
+ opt.on("P means this file is to be used for testing (prediction)")
24
+ opt.on("Options:\n")
25
+ opt.on("--configfile filename", "-f", String,
26
+ "Read alternate configuration file") { |setup['cfgfile']| }
27
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
28
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
29
+
30
+ opt.parse!
31
+
32
+ if ARGV.size < 1
33
+ puts opt
34
+ exit(1)
35
+ end
36
+ }
37
+
38
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
39
+ cfg = CLConfig.getDefaultConfig
40
+
41
+ filelist = ARGV[0]
42
+
43
+ training, features, testing = cfg.readTaggedFileList(filelist)
44
+
45
+ unless features.size > 0
46
+ puts "You must have at least one feature specified (with an F type code)"
47
+ exit(1)
48
+ end
49
+
50
+ n = Ncd.new()
51
+
52
+ [['trainsvm.txt', training], ['testsvm.txt', testing]].each { |outname, vecs|
53
+
54
+ f = File.open(outname, "w")
55
+
56
+ vecs.each { |tag, fname|
57
+ f.write "#{tag} "
58
+ fv = n.ncdVectorFile(fname, features)
59
+ fv.each_index { |i|
60
+ f.write "#{i+1}:#{fv[i]} "
61
+ }
62
+ f.puts
63
+ }
64
+
65
+ f.close
66
+
67
+ }
68
+
69
+ exit(0)
70
+
@@ -0,0 +1,70 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+
5
+ require 'CompLearnLib/Ncd.rb'
6
+
7
+ setup = Hash.new('')
8
+
9
+ TaskMaster.init()
10
+
11
+ ARGV.options { |opt|
12
+
13
+ opt.banner = "Usage: makesvm [options] filelist.txt"
14
+ opt.on("")
15
+ opt.on("Creates SVM classification training files from NCD's of listed files")
16
+ opt.on("filelist.txt must be a list of files, one per line")
17
+ opt.on("Each line must consist of:")
18
+ opt.on("<category> <type> <filename>")
19
+ opt.on("where <category> is an integer specifying the category")
20
+ opt.on("and <type> is either F, G, or P")
21
+ opt.on("F means this file is to be used as a feature extractor")
22
+ opt.on("G means this file is to be used for training (given)")
23
+ opt.on("P means this file is to be used for testing (prediction)")
24
+ opt.on("Options:\n")
25
+ opt.on("--configfile filename", "-f", String,
26
+ "Read alternate configuration file") { |setup['cfgfile']| }
27
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
28
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
29
+
30
+ opt.parse!
31
+
32
+ if ARGV.size < 1
33
+ puts opt
34
+ exit(1)
35
+ end
36
+ }
37
+
38
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
39
+ cfg = CLConfig.getDefaultConfig
40
+
41
+ filelist = ARGV[0]
42
+
43
+ training, features, testing = cfg.readTaggedFileList(filelist)
44
+
45
+ unless features.size > 0
46
+ puts "You must have at least one feature specified (with an F type code)"
47
+ exit(1)
48
+ end
49
+
50
+ n = Ncd.new()
51
+
52
+ [['trainsvm.txt', training], ['testsvm.txt', testing]].each { |outname, vecs|
53
+
54
+ f = File.open(outname, "w")
55
+
56
+ vecs.each { |tag, fname|
57
+ f.write "#{tag} "
58
+ fv = n.ncdVectorFile(fname, features)
59
+ fv.each_index { |i|
60
+ f.write "#{i+1}:#{fv[i]} "
61
+ }
62
+ f.puts
63
+ }
64
+
65
+ f.close
66
+
67
+ }
68
+
69
+ exit(0)
70
+
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+ require 'CompLearn'
5
+ require 'CompLearnLib/TaskMaster'
6
+ require 'CompLearnLib/Tree'
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/DistMatrix'
9
+
10
+ include MTree
11
+
12
+ setup = Hash.new('')
13
+
14
+ TaskMaster.init()
15
+
16
+ ARGV.options { |opt|
17
+
18
+ opt.banner = "Usage: maketree [options] distmat.txt treeout.dot"
19
+ opt.on("")
20
+ opt.on("Computes a good tree to fit a distance matrix distmat.txt.")
21
+ opt.on("distmat.txt must be a distance matrix file listing n*n")
22
+ opt.on("positive real numbers. Each line must contain n numbers")
23
+ opt.on("separated by spaces. The resultant tree will be written")
24
+ opt.on("to treeout.dot. This file is suitable for further processing")
25
+ opt.on("by labeltree, dot, or neato, for example.")
26
+ opt.on("If relative filenames are used, inputDir and cwd will be searched\n")
27
+ opt.on("Options:\n")
28
+ opt.on("--configfile filename", "-f", String,
29
+ "Read alternate configuration file") { |setup['cfgfile']| }
30
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
31
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
32
+
33
+ opt.parse!
34
+
35
+ if ARGV.size < 2
36
+ puts opt
37
+ exit(1)
38
+ end
39
+
40
+ }
41
+
42
+ begin
43
+
44
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
45
+ cfg = CLConfig.getDefaultConfig
46
+ distmat, treename = ARGV
47
+ dm = DistMatrix.readFromFile(distmat)
48
+ speciescount = dm.size
49
+ best = Tree.randomTree(speciescount)
50
+ TaskMaster.init
51
+ TaskMaster.storeEverywhere('dm', dm)
52
+ ts = cfg.isUseBestThirdOnly?()?
53
+ TreeScore.makeBestList(dm):
54
+ TreeScore.makeFullList(dm)
55
+ ts.penalty = cfg.unpairedPenalty()
56
+ maxfailedtries = cfg.maxFailedTries()
57
+ treespertry = cfg.treesPerTry()
58
+ bestscore = ts.score(best)
59
+
60
+ puts "The penalty term is #{ts.penalty}"
61
+ puts "Score is first #{bestscore}"
62
+
63
+ enqc = 0
64
+ todo = treespertry
65
+ failcount = 0
66
+ while todo > 0 && failcount < maxfailedtries
67
+ t = TreeTask.new(best, 100, cfg.unpairedPenalty())
68
+ enqc += 1
69
+ TaskMaster.enqueue(t) { |res,t,src|
70
+ newbest, newbestscore = res
71
+ if newbestscore > bestscore
72
+ bestscore = newbestscore
73
+ best = newbest
74
+ failcount = 0
75
+ puts "New best score found: #{bestscore}"
76
+ else
77
+ failcount += 1
78
+ end
79
+ enqc -= 1
80
+ }
81
+ todo -= 1
82
+ end
83
+
84
+ while enqc > 0
85
+ TaskMaster.waitForSlave()
86
+ end
87
+
88
+ bestscore = ts.score(best)
89
+ puts "Best tree score is #{bestscore}"
90
+ f = File.open(treename, 'w')
91
+ names = [ ]
92
+ speciescount.times { |i| names << i.to_s }
93
+ f.write(best.toDotString(names, 'tree', "Score: #{bestscore}"))
94
+
95
+ rescue
96
+ puts "Exception: #{$!}"
97
+ end
98
+ exit(0)
@@ -0,0 +1,98 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+ require 'CompLearn'
5
+ require 'CompLearnLib/TaskMaster'
6
+ require 'CompLearnLib/Tree'
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/DistMatrix'
9
+
10
+ include MTree
11
+
12
+ setup = Hash.new('')
13
+
14
+ TaskMaster.init()
15
+
16
+ ARGV.options { |opt|
17
+
18
+ opt.banner = "Usage: maketree [options] distmat.txt treeout.dot"
19
+ opt.on("")
20
+ opt.on("Computes a good tree to fit a distance matrix distmat.txt.")
21
+ opt.on("distmat.txt must be a distance matrix file listing n*n")
22
+ opt.on("positive real numbers. Each line must contain n numbers")
23
+ opt.on("separated by spaces. The resultant tree will be written")
24
+ opt.on("to treeout.dot. This file is suitable for further processing")
25
+ opt.on("by labeltree, dot, or neato, for example.")
26
+ opt.on("If relative filenames are used, inputDir and cwd will be searched\n")
27
+ opt.on("Options:\n")
28
+ opt.on("--configfile filename", "-f", String,
29
+ "Read alternate configuration file") { |setup['cfgfile']| }
30
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
31
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
32
+
33
+ opt.parse!
34
+
35
+ if ARGV.size < 2
36
+ puts opt
37
+ exit(1)
38
+ end
39
+
40
+ }
41
+
42
+ begin
43
+
44
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
45
+ cfg = CLConfig.getDefaultConfig
46
+ distmat, treename = ARGV
47
+ dm = DistMatrix.readFromFile(distmat)
48
+ speciescount = dm.size
49
+ best = Tree.randomTree(speciescount)
50
+ TaskMaster.init
51
+ TaskMaster.storeEverywhere('dm', dm)
52
+ ts = cfg.isUseBestThirdOnly?()?
53
+ TreeScore.makeBestList(dm):
54
+ TreeScore.makeFullList(dm)
55
+ ts.penalty = cfg.unpairedPenalty()
56
+ maxfailedtries = cfg.maxFailedTries()
57
+ treespertry = cfg.treesPerTry()
58
+ bestscore = ts.score(best)
59
+
60
+ puts "The penalty term is #{ts.penalty}"
61
+ puts "Score is first #{bestscore}"
62
+
63
+ enqc = 0
64
+ todo = treespertry
65
+ failcount = 0
66
+ while todo > 0 && failcount < maxfailedtries
67
+ t = TreeTask.new(best, 100, cfg.unpairedPenalty())
68
+ enqc += 1
69
+ TaskMaster.enqueue(t) { |res,t,src|
70
+ newbest, newbestscore = res
71
+ if newbestscore > bestscore
72
+ bestscore = newbestscore
73
+ best = newbest
74
+ failcount = 0
75
+ puts "New best score found: #{bestscore}"
76
+ else
77
+ failcount += 1
78
+ end
79
+ enqc -= 1
80
+ }
81
+ todo -= 1
82
+ end
83
+
84
+ while enqc > 0
85
+ TaskMaster.waitForSlave()
86
+ end
87
+
88
+ bestscore = ts.score(best)
89
+ puts "Best tree score is #{bestscore}"
90
+ f = File.open(treename, 'w')
91
+ names = [ ]
92
+ speciescount.times { |i| names << i.to_s }
93
+ f.write(best.toDotString(names, 'tree', "Score: #{bestscore}"))
94
+
95
+ rescue
96
+ puts "Exception: #{$!}"
97
+ end
98
+ exit(0)
OSZAR »