Skip to content

Commit c3f1980

Browse files
committed
releasing source code
1 parent 11cb416 commit c3f1980

13 files changed

+3112
-0
lines changed

Algorithm.pm

Lines changed: 576 additions & 0 deletions
Large diffs are not rendered by default.

Grammar.pm

Lines changed: 430 additions & 0 deletions
Large diffs are not rendered by default.

Individual.pm

Lines changed: 878 additions & 0 deletions
Large diffs are not rendered by default.

PDL/Kohonen.pm

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
# -*- mode: cperl -*-
2+
package PDL::Kohonen;
3+
4+
use PDL;
5+
use PDL::IO::FastRaw;
6+
7+
@ISA = qw(PDL);
8+
9+
### TO DO: load+save methods (simply with fraw)
10+
11+
12+
# data is always D x N
13+
# where D is the dimensionality of the data
14+
# and there are N data points
15+
# maps are always D x W x H
16+
17+
sub initialize {
18+
my $class = shift;
19+
my $self = {
20+
PDL => null, # used to store PDL object
21+
};
22+
bless $self, $class;
23+
}
24+
25+
##
26+
# init method
27+
##
28+
# usage: $map->init($data, 10, 6, 4);
29+
# initialises a 10x6x4 map randomly based on the min/max
30+
# of the data in $data piddle
31+
# or: $map->init($perlarrayref, 8, 7)
32+
# initialises a 8x7 map as above but uses a reference
33+
# to a perl array of individual data point piddles
34+
# [note: arrays of many piddles seem to take up memory]
35+
##
36+
sub init {
37+
my ($self, $data, @mdims) = @_;
38+
die "init() arguments: data, map_dimensions\n"
39+
unless (defined $data && @mdims > 0);
40+
41+
if (ref($data) eq 'ARRAY') {
42+
$data = cat(@$data);
43+
}
44+
45+
my $max = $data->mv(-1,0)->maximum;
46+
my $min = $data->mv(-1,0)->minimum;
47+
48+
my @ddims = $data->dims;
49+
my $n = pop @ddims;
50+
$self->{mapdims} = \@mdims; # map dimensions, e.g. 6 x 4
51+
$self->{nmapdims} = scalar @mdims;
52+
$self->{mapvolume} = ones(@mdims)->nelem;
53+
$self->{datadims} = \@ddims; # data dimensions, e.g. 20 x 15
54+
$self->{ndatadims} = scalar @ddims;
55+
($self->{sdim}) = sort { $a<=>$b } @mdims; # smallest map dimension size
56+
$self->{PDL} = random(@ddims, @mdims);
57+
$self->{PDL} *= ($max-$min)+$min;
58+
return $self;
59+
}
60+
61+
sub train {
62+
my ($self, $data, $p) = @_;
63+
64+
if (ref($data) eq 'ARRAY') {
65+
$data = cat(@$data);
66+
}
67+
68+
my $n = $data->dim(-1);
69+
my $dta = $data->mv(-1, 0);
70+
71+
die "you must init() the map first\n" unless ($self->{mapdims});
72+
73+
my $alpha = defined $p->{alpha} ? $p->{alpha} : 0.1;
74+
my $radius = defined $p->{radius} ? $p->{radius} : $self->{sdim}/2;
75+
76+
$p->{epochs} = 1 unless (defined $p->{epochs});
77+
$p->{order} = 'random' unless (defined $p->{order}); # also: linear
78+
79+
$p->{ramp} = 'linear' unless (defined $p->{ramp}); # also: off
80+
81+
my $winnerfunc = $p->{winnerfunc} || \&euclidean_winner;
82+
$p->{progress} = 'on' unless (defined $p->{progress});
83+
84+
my ($dalpha, $dradius) = (0, 0);
85+
if ($p->{ramp} =~ /linear/i) {
86+
$dalpha = $alpha/$p->{epochs};
87+
$dradius = $radius/$p->{epochs};
88+
}
89+
90+
my $progformat = $p->{progress} =~ /on/i ?
91+
"\rradius %2d alpha %6.4f data %5d of %5d epoch %3d of %3d" : '';
92+
93+
my $ordertype = 0;
94+
$ordertype = 1 if ($p->{order} =~ /linear/i);
95+
96+
if ($p->{progress} =~ /on/) {
97+
printf "training map (%s) with %d data points (%s) for %d epochs...\n",
98+
join("x", @{$self->{mapdims}}), $n,
99+
join("x", @{$self->{datadims}}), $p->{epochs};
100+
}
101+
local $| = 1;
102+
103+
for (my $e=0; $e<$p->{epochs}; $e++) {
104+
for (my $i=0; $i<$n; $i++) {
105+
printf $progformat, $radius, $alpha, $i+1, $n, $e+1, $p->{epochs};
106+
my $d = $ordertype ? $i : int(rand($n));
107+
108+
my $vec = $dta->slice("($d)");
109+
110+
my @w = $self->$winnerfunc($vec);
111+
112+
for (my $r=$radius; $r>=0; $r--) {
113+
my $hood = $self->hood($r, @w);
114+
$hood -= ($hood-$vec)*$alpha;
115+
}
116+
}
117+
$alpha -= $dalpha;
118+
$radius -= $dradius;
119+
}
120+
print "\n" if ($progformat);
121+
}
122+
123+
124+
# runs your data through a trained map
125+
# returns two piddles
126+
# - winning node coordinates (ushort M x N)
127+
# - quantisation error (double N)
128+
sub apply {
129+
my ($self, $data, $p) = @_;
130+
my $winnerfunc = $p->{winnerfunc} || \&euclidean_winner;
131+
$p->{progress} = 'on' unless (defined $p->{progress});
132+
133+
if (ref($data) eq 'ARRAY') {
134+
$data = cat(@$data);
135+
}
136+
137+
my $n = $data->dim(-1);
138+
my $dta = $data->mv(-1, 0);
139+
140+
my $mds = join("x", @{$self->{mapdims}});
141+
my $progformat = $p->{progress} =~ /on/i ?
142+
"\rapplying map ($mds) to data point %5d of %5d" : '';
143+
144+
local $| = 1;
145+
146+
my $winvecs = zeroes ushort, $n, $self->{nmapdims};
147+
my $errors = zeroes $n;
148+
149+
my $error = 0;
150+
for (my $i=0; $i<$n; $i++) {
151+
printf $progformat, $i+1, $n;
152+
my $vec = $dta->slice("($i)");
153+
$winvecs->slice("($i)") .= ushort($self->$winnerfunc($vec, \$error));
154+
set($errors, $i, $error);
155+
}
156+
print "\n" if ($progformat);
157+
158+
return ($winvecs->mv(0, -1), $errors);
159+
}
160+
161+
sub euclidean_winner {
162+
my ($self, $vec, $qref) = @_;
163+
164+
my $d = $vec - $self;
165+
$d *= $d;
166+
while ($d->ndims > $self->{nmapdims}) {
167+
$d = $d->sumover();
168+
}
169+
my @d = $d->dims();
170+
my ($i) = $d->flat->qsorti->list;
171+
172+
# pass the error back through a reference, if given
173+
if ($qref && ref($qref)) {
174+
$$qref = sqrt($d->flat->at($i));
175+
}
176+
177+
return $self->unflattenindex($i);
178+
}
179+
180+
sub unflattenindex {
181+
my ($self, $i) = @_;
182+
my @result;
183+
my $volume = $self->{mapvolume};
184+
foreach my $dim (reverse @{$self->{mapdims}}) {
185+
$volume = $volume/$dim;
186+
my $index = int($i/$volume);
187+
unshift @result, $index;
188+
$i -= $index*$volume;
189+
}
190+
return @result;
191+
}
192+
193+
sub hood {
194+
my ($self, $radius, @coords) = @_;
195+
$radius = int($radius);
196+
197+
my $slice = ',' x ($self->{ndatadims}-1);
198+
199+
if ($radius == 0) {
200+
return $self->slice(join ',', $slice, @coords);
201+
}
202+
203+
for (my $i=0; $i<@coords; $i++) {
204+
my ($left, $right) = ($coords[$i]-$radius, $coords[$i]+$radius);
205+
$left = 0 if ($left < 0);
206+
$right = $self->{mapdims}->[$i] - 1 if ($right >= $self->{mapdims}->[$i]);
207+
$slice .= ",$left:$right";
208+
}
209+
return $self->slice($slice);
210+
}
211+
212+
sub save {
213+
my ($self, $filename) = @_;
214+
$self->writefraw($filename);
215+
open(HDR, ">>$filename.hdr") || die "can't append to $filename.hdr";
216+
foreach $key (qw(nmapdims ndatadims sdim mapvolume)) {
217+
print HDR "PDL::Kohonen $key $self->{$key}\n";
218+
}
219+
close(HDR);
220+
}
221+
222+
sub load {
223+
my ($self, $filename) = @_;
224+
$self->{PDL} = readfraw($filename);
225+
226+
open(HDR, "$filename.hdr") || die "can't open $filename.hdr";
227+
while (<HDR>) {
228+
if (/^PDL::Kohonen/) {
229+
chomp;
230+
my ($dum, $key, $val) = split ' ', $_, 3;
231+
$self->{$key} = $val;
232+
}
233+
}
234+
close(HDR);
235+
236+
die "couldn't find header information while loading map\n"
237+
unless ($self->{nmapdims} && $self->{ndatadims} && $self->{sdim});
238+
239+
my @dims = $self->dims;
240+
my @mdims = splice @dims, -$self->{nmapdims};
241+
$self->{mapdims} = \@mdims;
242+
$self->{datadims} = \@dims;
243+
}
244+
245+
246+
247+
sub quantiseOLDCODE {
248+
my ($map, $data, $distfunc) = @_;
249+
$map = $map->clump(1,2);
250+
my $mapsize = $map->dim(1);
251+
my $dupdata = $data->dummy(1, $mapsize);
252+
my $d = $dupdata - $map;
253+
$d *= $d;
254+
$d = $d->sumover();
255+
my $i = $d->qsorti->slice("(0),:"); # get the map index of smallest dists
256+
for (my $dim = 0; $dim<$map->dim(0); $dim++) {
257+
$data->slice("($dim)") .= $map->slice("($dim)")->index($i);
258+
}
259+
}
260+
261+
262+
1;

PDL/ReadAudioSoundFile.pm

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package PDL::ReadAudioSoundFile;
2+
3+
use PDL;
4+
use PDL::Audio;
5+
6+
#### these are not needed any more - see _OLD subs below
7+
#use Audio::SoundFile;
8+
#use Audio::SoundFile::Header;
9+
10+
#no longer exported due to strange AUTOLOAD problem in perl 5.8.8
11+
#require Exporter;
12+
#@EXPORT = qw(readaudiosoundfile writeaudiosoundfile);
13+
14+
$BUFSIZE = 8**7;
15+
16+
sub readaudiosoundfile {
17+
my ($file) = @_;
18+
19+
$^W = 0;
20+
my $pdl = raudio($file);
21+
$^W = 1;
22+
23+
# raudio reads stereo files the other way round
24+
if ($pdl->ndims == 2) {
25+
my $hdr = $pdl->gethdr;
26+
$pdl = $pdl->mv(-1,0)->sever;
27+
$pdl->sethdr($hdr);
28+
}
29+
return $pdl;
30+
}
31+
32+
## default WAV file output only
33+
sub writeaudiosoundfile {
34+
my ($pdl, $file) = @_;
35+
$pdl = $pdl->scale2short unless ($pdl->type == short);
36+
$^W = 0;
37+
$pdl->mv(-1,0)->waudio( path => $file, filetype => FILE_RIFF,
38+
format => FORMAT_16_LINEAR_LITTLE_ENDIAN );
39+
$^W = 1;
40+
}
41+
42+
43+
sub readaudiosoundfile_OLD {
44+
my ($file, $debug) = @_;
45+
46+
my $header;
47+
my $reader = new Audio::SoundFile::Reader($file, \$header);
48+
my $channels = $header->{channels};
49+
my $samples = $header->{samples};
50+
my $samplerate = $header->{samplerate};
51+
if ($debug) {
52+
foreach $key (keys %$header) {
53+
warn "$key => $header->{$key}\n";
54+
}
55+
}
56+
my %hdr = (path=>$file,
57+
rate=>$samplerate);
58+
59+
my $pdl;
60+
my $remaining = $samples*$channels;
61+
my ($buf, $got);
62+
while (($got = $reader->bread_pdl(\$buf, $remaining > $BUFSIZE ? $BUFSIZE : $remaining)) > 0) {
63+
$remaining -= $got;
64+
if (defined $pdl) {
65+
$pdl = $pdl->append($buf);
66+
$pdl->sever;
67+
} else {
68+
$pdl = $buf->copy();
69+
}
70+
}
71+
$reader->close();
72+
# if stereo, the piddle from bread_pdl has the two channels
73+
# 'interleaved' and these need to be separated.
74+
if ($channels == 2) {
75+
my $left = $pdl->slice("0:-1:2");
76+
my $right = $pdl->slice("1:-1:2");
77+
my $both = cat $left, $right;
78+
$both->sethdr(\%hdr);
79+
return $both;
80+
} elsif ($channels == 1) {
81+
$pdl->sethdr(\%hdr);
82+
return $pdl;
83+
} else {
84+
die "can't handle $channels channels";
85+
}
86+
}
87+
88+
89+
sub writeaudiosoundfile_OLD {
90+
my ($pdl, $file) = @_;
91+
92+
my $header = new Audio::SoundFile::Header(
93+
samplerate => $pdl->rate() || 44100,
94+
channels => $pdl->dim(1),
95+
pcmbitwidth => 16,
96+
format => SF_FORMAT_WAV | SF_FORMAT_PCM,
97+
);
98+
99+
my $writer = new Audio::SoundFile::Writer($file, $header);
100+
101+
$pdl = $pdl->scale2short unless ($pdl->type == short);
102+
103+
# fold stereo sample into a single piddle
104+
if ($pdl->dim(1) == 2) {
105+
$pdl = $pdl->xchg(0,1)->flat;
106+
}
107+
108+
for (my $i=0; $i<$pdl->dim(0); $i+=$BUFSIZE) {
109+
my $end = $i+$BUFSIZE-1;
110+
$end = -1 if ($end >= $pdl->dim(0));
111+
112+
my $buf = $pdl->slice("$i:$end")->sever;
113+
my $wrote = $writer->bwrite_pdl($buf);
114+
}
115+
$writer->close;
116+
}
117+
1;

0 commit comments

Comments
 (0)