# -*- coding: UTF-8 -*-
#synthDrivers/sapi5.py
#A part of NonVisual Desktop Access (NVDA)
#Copyright (C) 2006-2014 NV Access Limited, Peter Vágner, Aleksey Sadovoy
#This file is covered by the GNU General Public License.
#See the file COPYING for more details.
import locale
from collections import OrderedDict
import threading
import time
import os
from ctypes import *
import comtypes.client
from comtypes import COMError
import _winreg
import audioDucking
import NVDAHelper
import globalVars
import speech
from synthDriverHandler import SynthDriver,VoiceInfo
import config
import nvwave
from logHandler import log
class FunctionHooker(object):
def __init__(self,targetDll,importDll,funcName,newFunction):
hook=NVDAHelper.localLib.dllImportTableHooks_hookSingle(targetDll,importDll,funcName,newFunction)
if hook:
print "hooked %s"%funcName
else:
print "could not hook %s"%funcName
raise RuntimeError("could not hook %s"%funcName)
def __del__(self):
NVDAHelper.localLib.dllImportTableHooks_unhookSingle(self._hook)
_duckersByHandle={}
@WINFUNCTYPE(windll.winmm.waveOutOpen.restype,*windll.winmm.waveOutOpen.argtypes,use_errno=False,use_last_error=False)
def waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags):
try:
res=windll.winmm.waveOutOpen(pWaveOutHandle,deviceID,wfx,callback,callbackInstance,flags) or 0
except WindowsError as e:
res=e.winerror
if res==0 and pWaveOutHandle:
h=pWaveOutHandle.contents.value
d=audioDucking.AudioDucker()
d.enable()
_duckersByHandle[h]=d
return res
@WINFUNCTYPE(c_long,c_long)
def waveOutClose(waveOutHandle):
try:
res=windll.winmm.waveOutClose(waveOutHandle) or 0
except WindowsError as e:
res=e.winerror
if res==0 and waveOutHandle:
_duckersByHandle.pop(waveOutHandle,None)
return res
_waveOutHooks=[]
def ensureWaveOutHooks():
if not _waveOutHooks and audioDucking.isAudioDuckingSupported():
sapiPath=os.path.join(os.path.expandvars("$SYSTEMROOT"),"system32","speech","common","sapi.dll")
_waveOutHooks.append(FunctionHooker(sapiPath,"WINMM.dll","waveOutOpen",waveOutOpen))
_waveOutHooks.append(FunctionHooker(sapiPath,"WINMM.dll","waveOutClose",waveOutClose))
class constants:
SVSFlagsAsync = 1
SVSFPurgeBeforeSpeak = 2
SVSFIsXML = 8
class SynthDriver(SynthDriver):
supportedSettings=(SynthDriver.VoiceSetting(),SynthDriver.RateSetting(),SynthDriver.PitchSetting(),SynthDriver.VolumeSetting())
COM_CLASS = "SAPI.SPVoice"
name="sapi5"
description="Microsoft Speech API version 5"
@classmethod
def check(cls):
try:
r=_winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,cls.COM_CLASS)
r.Close()
return True
except:
return False
def __init__(self,_defaultVoiceToken=None):
"""
@param _defaultVoiceToken: an optional sapi voice token which should be used as the default voice (only useful for subclasses)
@type _defaultVoiceToken: ISpeechObjectToken
"""
ensureWaveOutHooks()
self._pitch=50
self._initTts(_defaultVoiceToken)
def terminate(self):
del self.tts
def _getAvailableVoices(self):
voices=OrderedDict()
v=self._getVoiceTokens()
# #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
# Therefore, fetch the items by index, as that method explicitly returns the correct interface.
for i in xrange(len(v)):
try:
ID=v[i].Id
name=v[i].GetDescription()
try:
language=locale.windows_locale[int(v[i].getattribute('language').split(';')[0],16)]
except KeyError:
language=None
except COMError:
log.warning("Could not get the voice info. Skipping...")
voices[ID]=VoiceInfo(ID,name,language)
return voices
def _getVoiceTokens(self):
"""Provides a collection of sapi5 voice tokens. Can be overridden by subclasses if tokens should be looked for in some other registry location."""
return self.tts.getVoices()
def _get_rate(self):
return (self.tts.rate*5)+50
def _get_pitch(self):
return self._pitch
def _get_volume(self):
return self.tts.volume
def _get_voice(self):
return self.tts.voice.Id
def _get_lastIndex(self):
bookmark=self.tts.status.LastBookmark
if bookmark!="" and bookmark is not None:
return int(bookmark)
else:
return None
def _percentToRate(self, percent):
return (percent - 50) / 5
def _set_rate(self,rate):
self.tts.Rate = self._percentToRate(rate)
def _set_pitch(self,value):
#pitch is really controled with xml around speak commands
self._pitch=value
def _set_volume(self,value):
self.tts.Volume = value
def _initTts(self, voice=None):
self.tts=comtypes.client.CreateObject(self.COM_CLASS)
if voice:
# #749: It seems that SAPI 5 doesn't reset the audio parameters when the voice is changed,
# but only when the audio output is changed.
# Therefore, set the voice before setting the audio output.
# Otherwise, we will get poor speech quality in some cases.
self.tts.voice = voice
outputDeviceID=nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True)
if outputDeviceID>=0:
self.tts.audioOutput=self.tts.getAudioOutputs()[outputDeviceID]
def _set_voice(self,value):
tokens = self._getVoiceTokens()
# #2629: Iterating uses IEnumVARIANT and GetBestInterface doesn't work on tokens returned by some token enumerators.
# Therefore, fetch the items by index, as that method explicitly returns the correct interface.
for i in xrange(len(tokens)):
voice=tokens[i]
if value==voice.Id:
break
else:
# Voice not found.
return
self._initTts(voice=voice)
def _percentToPitch(self, percent):
return percent / 2 - 25
IPA_TO_SAPI = {
u"θ": u"th",
u"s": u"s",
}
def _convertPhoneme(self, ipa):
# We only know about US English phonemes.
# Rather than just ignoring unknown phonemes, SAPI throws an exception.
# Therefore, don't bother with any other language.
if self.tts.voice.GetAttribute("language") != "409":
raise LookupError("No data for this language")
out = []
outAfter = None
for ipaChar in ipa:
if ipaChar == u"ˈ":
outAfter = u"1"
continue
out.append(self.IPA_TO_SAPI[ipaChar])
if outAfter:
out.append(outAfter)
outAfter = None
if outAfter:
out.append(outAfter)
return u" ".join(out)
def speak(self, speechSequence):
textList = []
# NVDA SpeechCommands are linear, but XML is hierarchical.
# Therefore, we track values for non-empty tags.
# When a tag changes, we close all previously opened tags and open new ones.
tags = {}
# We have to use something mutable here because it needs to be changed by the inner function.
tagsChanged = [True]
openedTags = []
def outputTags():
if not tagsChanged[0]:
return
for tag in reversed(openedTags):
textList.append("%s>" % tag)
del openedTags[:]
for tag, attrs in tags.iteritems():
textList.append("<%s" % tag)
for attr, val in attrs.iteritems():
textList.append(' %s="%s"' % (attr, val))
textList.append(">")
openedTags.append(tag)
tagsChanged[0] = False
pitch = self._pitch
# Pitch must always be specified in the markup.
tags["pitch"] = {"absmiddle": self._percentToPitch(pitch)}
rate = self.rate
volume = self.volume
for item in speechSequence:
if isinstance(item, basestring):
outputTags()
textList.append(item.replace("<", "<"))
elif isinstance(item, speech.IndexCommand):
textList.append('' % item.index)
elif isinstance(item, speech.CharacterModeCommand):
if item.state:
tags["spell"] = {}
else:
try:
del tags["spell"]
except KeyError:
pass
tagsChanged[0] = True
elif isinstance(item, speech.BreakCommand):
textList.append('' % item.time)
elif isinstance(item, speech.PitchCommand):
tags["pitch"] = {"absmiddle": self._percentToPitch(int(pitch * item.multiplier))}
tagsChanged[0] = True
elif isinstance(item, speech.VolumeCommand):
if item.multiplier == 1:
try:
del tags["volume"]
except KeyError:
pass
else:
tags["volume"] = {"level": int(volume * item.multiplier)}
tagsChanged[0] = True
elif isinstance(item, speech.RateCommand):
if item.multiplier == 1:
try:
del tags["rate"]
except KeyError:
pass
else:
tags["rate"] = {"absspeed": self._percentToRate(int(rate * item.multiplier))}
tagsChanged[0] = True
elif isinstance(item, speech.PhonemeCommand):
try:
textList.append(u'%s'
% (self._convertPhoneme(item.ipa), item.text or u""))
except LookupError:
log.debugWarning("Couldn't convert character in IPA string: %s" % item.ipa)
if item.text:
textList.append(item.text)
elif isinstance(item, speech.SpeechCommand):
log.debugWarning("Unsupported speech command: %s" % item)
else:
log.error("Unknown speech: %s" % item)
# Close any tags that are still open.
tags.clear()
tagsChanged[0] = True
outputTags()
text = "".join(textList)
flags = constants.SVSFIsXML | constants.SVSFlagsAsync
self.tts.Speak(text, flags)
def cancel(self):
#if self.tts.Status.RunningState == 2:
self.tts.Speak(None, 1|constants.SVSFPurgeBeforeSpeak)
def pause(self,switch):
if switch:
self.cancel()