CodeGize-Unity利用Sapi进行语音开发

21

2016
04

Unity利用Sapi进行语音开发

软件中的语音技术主要包含两种：语音识别speech recognition和语音合成speech synthesis。一般地，开发者会因为技术实力和资金实力等各方面的问题无力完成专业的语音引擎，因此通常选择现有的较为专业的语音引擎来完成相关的开发，比如国内非常出名的科大讯飞，百度语音等等。当然国外的还有Google语音，微软有SAPI等等。

在VR开发过程中，由于运行在Windows环境下，那么自然而然，我们首选SAPI来进行语音开发。一是和Windows原生，二是离线不需要网络，三是不需要任何插件。另外就是SAPI发音，尤其是英文发音，还是相对来说质量不错的。（Win7以上自带）

使用SAPI，需要使用到的是System.Speech.dll文件。由于Unity需要将Dll文件放在Asset目录下，而这样的结果会发现sapi failed to initialize。原因怀疑为需要特定的上下文环境才能运行dll的api，以至于拷贝到Asset目录导致上下文环境缺失而无法运行。

但是如果做过这方面开发的知道，在C#的其他应用里面引用System.Speech.dll是完全没有问题的。那么是不是我们可以开发一个专门的第三方程序，然后unity进行调用呢？按照这个思路，我们开发了一个控制台程序Speech.exe，主要功能是根据输入文本进行语音合成。

代码较为简单

/*简单的SAPI语音合成控制台程序*/

using System.Speech.Synthesis;
using SpeechTest.Properties;
namespace SpeechTest
{
    class Program {
         static void Main(string[] args) {
             var speaker = new SpeechSynthesizer(); speaker.Speak(“test”); } }
}

OK，运行就可以听到机器发音Test了。

我们修改一下，改为从参数中读取，这样的话，我们可以在unity中利用Process运行Speech.exe，并传给Speech参数。

/*从参数读取需要发音的文本*/

using System.Speech.Synthesis;
using SpeechTest.Properties;
namespace SpeechTest
{
    class Program 
    {
        static void Main(string[] args) 
        {
            var speaker = new SpeechSynthesizer();
            var res = args.Length == 0 ? "请说" : args[0]; 
            speaker.Speak(res); 
        } 
    }
}

我们先使用CMD命令行，cd到Speech.exe所在的目录，然后输入Speech.exe test，如我们预想的那般，机器发音test。测试通过。

为了能够更改发音的配置，增加一些代码，从Setting中读取相关的配置数据，代码更改如下：

/*能够配置的控制台程序*/

using System.Speech.Synthesis;
using SpeechTest.Properties;
namespace SpeechTest
{
    class Program 
    {
        static void Main(string[] args)
        {
            var speaker = new SpeechSynthesizer(); 
            speaker.Volume = Settings.Default.SpeakVolume; 
            speaker.Rate = Settings.Default.SpeakRate;
            var voice = Settings.Default.SpeakVoice;
            if (!string.IsNullOrEmpty(voice)) 
                speaker.SelectVoice(voice);
            var res = args.Length == 0 ? "请说" : args[0]; 
            speaker.Speak(res); 
        }
    }
}

接下来我们在Unity中使用Process来开启这个Speech.exe，代码如下：

/*Unity中开启Speech.exe进程*/

using System.Diagnostics;
public class Speecher: MonoBehaviour
{
    public static void Speak(string str) 
    {
        var proc = new Process { StartInfo = new ProcessStartInfo { FileName = "speech.exe", Arguments = "\"" + str + "\"", } }; 
        proc.Start(); 
    }
    
    /***测试代码，可删除Start***/
    
    protected void Start() 
    { 
        Speak("test");
    }
    
    /***测试代码，可删除End***/
}

将脚本挂在任何一个GO（GameObject）上，运行，黑框出现，同时听到发音，测试完成。

接下来我们隐藏这个黑框。代码修改如下：

/*Unity开启无框的Speech.exe进程*/

using System.Diagnostics;
public class Speecher: MonoBehaviour
{
    public static void Speak(string str) 
    {
        var proc = new Process 
        { 
            StartInfo = new ProcessStartInfo 
            { 
                FileName = "speech.exe", 
                Arguments = "\"" + str + "\"", 
                CreateNoWindow = true, 
                WindowStyle = ProcessWindowStyle.Hidden, 
            } 
        }; 
        proc.Start(); 
    }
    /***测试代码，可删除Start***/
    protected void Start() 
    { 
        Speak("test"); 
    }
    /***测试代码，可删除End***/
}

其实到了这一步，主要的功能都完成了。但是细心的会发现，这样不断创建进程然后关闭进程的方式会不会太笨了。可不可以让Speech这个进程一直开启着，收到unity的信息时就发音呢？这就涉及到进程间通信了。

Windows的进程是相互独立的，各自有各自的分配空间。但是并不意味这不能相互通信。方法有很多，比如读写文件，发送消息（hook），Socket等等。其中Socket实现起来相对简单，尤其是我们已经拥有Socket封装库的情况下，只要少量代码就行了。

于是在Speech改成一个Socket服务器，代码如下：

/*Speech 服务端*/

using System;
using System.Linq;
using System.Speech.Synthesis;
using System.Text;
using Speech.Properties;
namespace Speech
{
    class Program {
        static void Main(string[] args) 
        {
            var server = new NetServer(); 
            server.StartServer();
            while (true) 
            {
                var res = Console.ReadLine();
                if (res == "exit")
                    break; 
             } 
         } 
     }

    public class NetServer : SocketExtra.INetComponent {
        private readonly Speecher m_speecher;
        private readonly SocketExtra m_socket;
        public NetServer() 
        { 
            m_speecher = new Speecher(); 
            m_socket = new SocketExtra(this); 
        }
        public void StartServer() 
        { 
            m_socket.Bind("127.0.0.1", Settings.Default.Port); 
        }
        public bool NetSendMsg(byte[] sendbuffer) 
        {
            return true; 
        }
        public bool NetReciveMsg(byte[] recivebuffer) 
        {
            var str = Encoding.Default.GetString(recivebuffer); 
            Console.WriteLine(str); 
            m_speecher.Speak(str);return true; 
        }
        public bool Connected { get { return m_socket.Connected; } } 
    }
    public class Speecher 
    {
        private readonly SpeechSynthesizer m_speaker;
        public Speecher() 
        { 
            m_speaker = new SpeechSynthesizer();
            var installs = m_speaker.GetInstalledVoices(); 
            m_speaker.Volume = Settings.Default.SpeakVolume; 
            m_speaker.Rate = Settings.Default.SpeakRate;
            var voice = Settings.Default.SpeakVoice;
            var selected = false;
            if (!string.IsNullOrEmpty(voice)) 
            {
                if (installs.Any(install => install.VoiceInfo.Name == voice)) 
                { 
                    m_speaker.SelectVoice(voice); 
                    selected = true; 
                } 
            }
            if (!selected) 
            {
                foreach (var install in installs.Where(install => install.VoiceInfo.Culture.Name == "en-US")) 
                { 
                    m_speaker.SelectVoice(install.VoiceInfo.Name);
                    break; 
                } 
            } 
        }
        public void Speak(string msg) 
        { 
            m_speaker.Speak(msg); 
        }
    }
}

同时修改Unity代码，增加Socket相关代码：

/*Unity客户端代码*/

using System.Collections;
using System.Diagnostics;
using System.Text;
using UnityEngine;
public class Speecher : MonoBehaviour, SocketExtra.INetComponent
{
    private SocketExtra m_socket;
    private Process m_process;
    protected void Awake() 
    { 
        Ins = this; m_process = new Process 
        { 
            StartInfo = new ProcessStartInfo 
            { 
                FileName = "speech.exe", 
                CreateNoWindow = true, 
                WindowStyle = ProcessWindowStyle.Hidden 
            }, 
        }; 
        m_process.Start(); 
    }
/***测试代码，可删除Start***/
    protected IEnumerator Start() 
    {
        yield return StartCoroutine(Connect()); 
        Speak("test"); 
    }
/***测试代码，可删除End***/
    public IEnumerator Connect() 
    { 
        m_socket = new SocketExtra(this); 
        m_socket.Connect("127.0.0.1", 9903);
        while (!m_socket.Connected)
        {
            yield return 1;
        }
    }
    protected void OnDestroy()
    {
        if (m_process != null && !m_process.HasExited)
            m_process.Kill(); 
        m_process = null; 
    }
    public static Speecher Ins;
    
    public static void Speak(string str) 
    {
#if UNITY_EDITOR||UNITY_STANDALONE_WIN
        Ins.Speech(str);
#endif
    }
    public void Speech(string str) 
    {
        if (m_socket.Connected) 
        {
            var bytes = Encoding.Default.GetBytes(str); 
            m_socket.SendMsg(bytes); 
        } 
    }
    public bool NetReciveMsg(byte[] recivebuffer) 
    {
        return true; 
    }
    public bool NetSendMsg(byte[] sendbuffer) 
    {
        return true; 
    }
}

OK，大功告成。工程见Github