What I've done is written a network server that expects "$datasize\n"
followed by a data stream which is expected to be a 16KHz, 16bit, PCM
.WAV file. This little bit here is a requirement of sphinx2, and not knowing this cost me 80% of my time on this project. =(
In
turn it decodes this with sphinx2 via the Speech::Recognizer::SPX perl
module and returns the deciphered text to the network client.
I chose to implement this way so that I could easily throw more resources at the SR engine by adding more machines behind a load balancer. The server can of course run on localhost as well. My goal is to build a broker daemon that will do the loadbalancing by distributing the processing among multiple registered agent daemons. The broker will also manage multiple dictionaries and allow one to build dictionaries on the fly.
This code is VERY ALPHA, but works. Feel free to email me if you have any questions and especially if you'd like to contribute. Please believe me when I say my code is not usually this ughly, but I've been banging my head against the wall for almost a week to get something working.
Also, note that this is NOT Asterisk specific by any means. It could surely be used in countless other areas.
On the Server:
Change:
-samp => 16000,
To:
-samp => 8000,
Change all lines that refer to "$SPHINXDIR/model/hmm/6k" to "$SPHINXDIR/model/hmm/communicator"
And on client:
Change:
"sox -t $type - -s -r 16000 -w -t wav - 2>/dev/null"
To:
"sox -t $type - -s -r 8000 -w -t wav - 2>/dev/null"
The sample dictionary provided is just Yes/No/Accept/Cancel.
You can create your own here
If you choose to use your own dictionary, update the server code appropriately. Specifically:
-cepdir => "$SPHINXDIR/model/lm/confirm",
-datadir => "$SPHINXDIR/model/lm/confirm",
..
-kbdumpdir => "$SPHINXDIR/model/lm/confirm",
-lmfn => "$SPHINXDIR/model/lm/confirm/confirm.lm",
-dictfn => "$SPHINXDIR/model/lm/confirm/confirm.dic",
#!/usr/bin/perl # sphinx-netserver.pl # Copyright (c) 2005 Josh McAllister # # This program is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # Written by Josh McAllisteruse IO::Socket; use Symbol; use POSIX; $SPHINXDIR='/usr/share/sphinx2'; use Speech::Recognizer::SPX qw(:uttproc :fbs $SPHINXDIR); #use Speech::Recognizer::SPX::Server; # establish SERVER socket, bind and listen. $server = IO::Socket::INET->new(LocalPort => 1069, Type => SOCK_STREAM, Proto => 'tcp', Reuse => 1, Listen => 10 ) or die "making socket: $@\n"; # global variables $PREFORK = 5; # number of children to maintain $MAX_CLIENTS_PER_CHILD = 50; # number of clients each child should process %children = (); # keys are current child process IDs $children = 0; # current number of children sub REAPER { # takes care of dead children $SIG{CHLD} = \&REAPER; my $pid = wait; $children --; delete $children{$pid}; } sub HUNTSMAN { # signal handler for SIGINT local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; exit; # clean up with dignity } # Fork off our children. for (1 .. $PREFORK) { make_new_child(); } # Install signal handlers. $SIG{CHLD} = \&REAPER; $SIG{INT} = \&HUNTSMAN; # And maintain the population. while (1) { sleep; # wait for a signal (i.e., child's death) for ($i = $children; $i < $PREFORK; $i++) { make_new_child(); # top up the child pool } } sub make_new_child { my $pid; my $sigset; # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) or die "Can't block SIGINT for fork: $!\n"; die "fork: $!" unless defined ($pid = fork); if ($pid) { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = 1; $children++; return; } else { #Child # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; # Initialize sphinx fbs_init({-live => 'FALSE', -samp => 16000, -adcin => 'TRUE', -ctloffset => 0, -ctlcount => 100000000, -cepdir => "$SPHINXDIR/model/lm/confirm", -datadir => "$SPHINXDIR/model/lm/confirm", -agcmax => 'FALSE', -langwt => 6.5, -fwdflatlw => 8.5, -rescorelw => 9.5, -ugwt => 0.5, -fillpen => 1e-10, -silpen => 1e-10, #0.005, -inspen => 0.65, -top => 1, -topsenfrm => 3, -topsenthresh => -70000, -beam => 2e-06, -npbeam => 2e-06, -lpbeam => 2e-05, -lponlybeam => 0.0005, -nwbeam => 0.0005, -fwdflat => 'FALSE', -fwdflatbeam => 1e-08, -fwdflatnwbeam=> 0.0003, -bestpath => 'TRUE', -kbdumpdir => "$SPHINXDIR/model/lm/confirm", -lmfn => "$SPHINXDIR/model/lm/confirm/confirm.lm", -dictfn => "$SPHINXDIR/model/lm/confirm/confirm.dic", -phnfn => "$SPHINXDIR/model/hmm/6k/phone", -mapfn => "$SPHINXDIR/model/hmm/6k/map", -hmmdir => "$SPHINXDIR/model/hmm/6k", -hmmdirlist => "$SPHINXDIR/model/hmm/6k", -ndictfn => "$SPHINXDIR/model/hmm/6k/noisedict", '-8bsen' => 'TRUE', -sendumpfn => "$SPHINXDIR/model/hmm/6k/sendump", -cbdir => "$SPHINXDIR/model/hmm/6k"}); # handle connections until we've reached $MAX_CLIENTS_PER_CHILD for ($i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) { my $buf = undef; $client = $server->accept() or last; uttproc_begin_utt(); my $count = 0; my $datasize = readline $client; chomp $datasize; my $b = read ($client, my($buf), $datasize); #print "SERVER DEBUG: Expecting $datasize bytes, got $b bytes.\n"; uttproc_rawdata($buf, 1) or die "uttproc_rawdata failed"; uttproc_end_utt(); my ($fr, $hyp) = uttproc_result(1); #print "frames $fr\n"; print STDERR "SERVER RESULT: $hyp\n"; print $client "$hyp"; close $client; } # tidy up gracefully and finish # this exit is VERY important, otherwise the child will become # a producer of more and more children, forking yourself into # process death. fbs_end(); exit; } }
#!/usr/bin/perl # sphinx-netclient.pl # Copyright (c) 2005 Josh McAllister # # This program is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # Written by Josh McAllisterRemember... The sample dictionary only understands yes,no,accept,cancel. Record a .wav or .gsm file with any of those words (test.wav), then start sphinx-netserver.pl, then run sphinx-netclient.pl test.wav to test.die ("usage: $0 \n") if not -e $ARGV[0]; print "Result: " . asr("$ARGV[0]") . "\n"; exit; sub asr { use IO::Socket; use FileHandle; use IPC::Open2; my $file = shift or return undef; my $host = shift || 'localhost'; my $port = shift || '1069'; my $fh; my $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => "$host", PeerPort => "$port", ) or return undef; #Idea here being that you can pass a reference to an existing file handle... not yet implemented, just pass a filename. if (ref $file) { my $fh = $file; } else { open (FH, $file) || return undef; $fh = *FH; } $file =~ /(gsm|wav)$/; my $type = $1; if ($type !~ /gsm|wav/) { warn "Unknown file type ($file)"; return undef; } #print "FTYPE: $type\n"; $pid = open2(*SOXIN, *SOXOUT, "sox -t $type - -s -r 16000 -w -t wav - 2>/dev/null") || warn ("Could not open2.\n"); binmode $fh; binmode SOXIN; binmode SOXOUT; binmode $remote; while (defined(my $b = read $fh, my($buf), 4096)) { last if $b == 0; $count += $b; print SOXOUT $buf; } close SOXOUT; $count = 0; my $sox = undef; while (defined(my $b = read SOXIN, my($buf), 4096)) { last if $b == 0; $count += $b; $sox .= $buf; } print $remote length($sox) . "\n"; print $remote "$sox"; close SOXIN; #print "DEBUG: Waiting for result.\n"; $count=0; while (defined(my $b = read $remote, my($buf), 4096)) { last if $b == 0; $count += $b; $result .= $buf; } close $fh; close $remote; return "$result"; }
Lastly, Here is a snippet of how I use it in an AGI:
sub confirm {
while (my $tries <= 3) {
$tries++;
$AGI->stream_file("say_yes_no",'""');
$AGI->stream_file("beep",'""');
$AGI->record_file("/tmp/$$", 'gsm', '0',3000);
$AGI->stream_file("beep",'""');
my $vresponse = asr("/tmp/$$.gsm");
$AGI->verbose("CONFIRM: $vresponse");
next if $vresponse !~ /YES|NO|ACCEPT|CANCEL/;
$gotresp = 1;
if ($vresponse =~ /NO|CANCEL/i) {
sleep 1;
$AGI->stream_file("cancelled",'""');
return undef;
} else {
return 1;
}
}
if (! $gotresp) {
sleep 1;
$AGI->stream_file("invalid_selection",'""');
return undef;
}
}