2012-09-30 72 views
2

我試圖做一個Android應用程序與語音識別,但不幸的是谷歌不支持我的語言(MACEDONIAN),我試圖比較兩個錄音的聲音。語音識別和聲音比較與音樂g

我使用http://code.google.com/p/musicg/來記錄和比較語音,並且我正在初始化檢測語音的設置。有人可以告訴我如何重寫這個初始化語音檢測功能,這對我來說非常重要。或者其他一些想法 如何做到這一點。

這是哨子檢測初始化

  // settings for detecting a whistle 

      minFrequency = 600.0f; 
      maxFrequency = Double.MAX_VALUE; 

      minIntensity = 100.0f; 
      maxIntensity = 100000.0f; 

      minStandardDeviation = 0.1f; 
      maxStandardDeviation = 1.0f; 

      highPass = 500; 
      lowPass = 10000; 

      minNumZeroCross = 50; 
      maxNumZeroCross = 200; 

      numRobust = 10; 
+0

是你在這個成功的?我正在嘗試做類似的事情 –

+1

我會發布一些代碼,但與我所期望的並不真實相關,我無法獲得每次聲音的預期,但它不依賴於api算法,而是來自不同的噪聲和錄音文字聲音的速度。 – user1668168

回答

1

我的理解是,musicg DetectionApi,因爲它的立場,只是爲了分析聲音的單個塊,並告訴你它是否包含的聲音。如包含的哨子或拍手api示例。 I.e是一個拍手/它是一個哨子。

有了musicg,你可以做的最好的事情就是識別聲音是不是語音......雖然這可能超出了DetectionApi

既然你說過谷歌API不支持馬其頓,也許你可以試試Pocketsphinx,這是在this stackoverflow article中提到的。

0

首先,你需要做的就是保存錄音成WAV,然後很容易使用指紋類從他們的API https://code.google.com/p/musicg/source/browse/#git%2Fsrc%2Fcom%2Fmusicg%2Ffingerprint

這裏是我正在做的比較 - ,一個臨時錄製的WAV聲音與我的數據中的所有我的wav聲音。

public Cursor FP(String recordedClip, Context context) { 

    Baza baza = new Baza(context); 

    Cursor allSound = baza.getAllProtocolsForSoundCheck(); 

    List<Protocol> protocols = new ArrayList<Protocol>(); 
    int PID =-1; 

    Log.d("broj",allSound.getCount()+""); 

    for (int i = 0; i < allSound.getCount(); i++) { 


     Protocol protocol = new Protocol(); 
     allSound.moveToNext(); 
     protocol.setSoundPath(allSound.getString(4)); 
     protocol.setId(Integer.parseInt(allSound.getString(1))); 
     protocols.add(protocol); 

     Log.d("brojProtocol",allSound.getString(2)+" "); 
     baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1); 
    } 

    Wave record = new Wave(recordedClip); 

    List<Wave> waves = new ArrayList<Wave>(); 

    if (protocols != null) { 
     for (int i = 0; i < protocols.size(); i++) { 
      waves.add(new Wave(protocols.get(i).getSoundPath())); 
     } 
    } 

    for (int i = 0; i < waves.size(); i++) { 

     Log.d("similarity", record.getFingerprintSimilarity(waves.get(i)) 
         .getSimilarity()+""); 


     baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(), 
       record.getFingerprintSimilarity(waves.get(i)) 
         .getSimilarity()); 
    } 

    Cursor similarCursor = baza.getSimilarProtocols(); 
    similarCursor.moveToFirst(); 
    TransferClass protocolForTransfer = new TransferClass(); 
    protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1))); 
    protocolForTransfer.setName(similarCursor.getString(2)); 

    Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId()); 
// return protocolForTransfer; 

    return similarCursor; 
} 
0

,這裏是我如何保存聲音錄製成WAV格式,我的溫度:

public class RecorderActivity { 

    private static final int RECORDER_BPP = 16; 
    private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav"; 
    private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound"; 
    private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw"; 
    private String AUDIO_RECORDER_FILE = ""; 
    private static final int RECORDER_SAMPLERATE = 8000; 
    private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO; 
    private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT; 

    private RealDoubleFFT transformer; 

    EndPointDetection endpoint; 

    int blockSize = 256; 

    private AudioRecord recorder = null; 
    private int bufferSize = 0; 
    private RecorderAsynctask recordingThread = null; 
    private boolean isRecording = false; 

    float tempFloatBuffer[] = new float[3]; 
    int tempIndex = 0; 
    int totalReadBytes = 0; 

    ImageView imageView; 
    Bitmap bitmap; 
    Canvas canvas; 
    Paint paint; 

    Context con; 

    RecorderActivity(String file, Context con, ImageView image) { 
     AUDIO_RECORDER_FILE = file; 
     this.con = con; 

     this.imageView = image; 
     bitmap = Bitmap.createBitmap((int) 256, (int) 100, 
       Bitmap.Config.ARGB_8888); 
     canvas = new Canvas(bitmap); 
     paint = new Paint(); 
     paint.setStrokeWidth(5); 
     paint.setColor(Color.BLUE); 
     imageView.setImageBitmap(bitmap); 

     transformer = new RealDoubleFFT(256); 

     bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE, 
       RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING); 

    } 

    public String getFilename() { 
     String filepath = Environment.getExternalStorageDirectory().getPath(); 
     File file = new File(filepath, AUDIO_RECORDER_FOLDER); 

     if (!file.exists()) { 
      file.mkdirs(); 
     } 

     return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV); 
    } 

    private String getTempFilename() { 
     String filepath = Environment.getExternalStorageDirectory().getPath(); 
     File file = new File(filepath, AUDIO_RECORDER_FOLDER); 

     if (!file.exists()) { 
      file.mkdirs(); 
     } 

     File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE); 

     if (tempFile.exists()) 
      tempFile.delete(); 

     return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE); 
    } 

    public void startRecording() { 
     recorder = new AudioRecord(MediaRecorder.AudioSource.MIC, 
       RECORDER_SAMPLERATE, RECORDER_CHANNELS, 
       RECORDER_AUDIO_ENCODING, bufferSize); 

     recorder.startRecording(); 

     isRecording = true; 

     recordingThread = new RecorderAsynctask(); 
     recordingThread.execute(this); 

    } 

    class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void> { 

     public void shareLockedfuntionProgreesUpdate(double[] fttrezult) { 

      publishProgress(fttrezult); 

     } 

     @Override 
     protected Void doInBackground(RecorderActivity... params) { 
      // TODO Auto-generated method stub 

      byte data[] = new byte[bufferSize]; 
      String filename = getTempFilename(); 
      FileOutputStream os = null; 

      try { 
       os = new FileOutputStream(filename); 
      } catch (FileNotFoundException e) { 
       // TODO Auto-generated catch block 
       e.printStackTrace(); 
      } 

      int read = 0; 

      AudioTrack tempAudioTrack; 

      double[] toTransform = new double[blockSize]; 

      if (null != os) { 
       while (isRecording) { 
        // sampleRateTextField.setText(recorder.getSampleRate()); 

        int bufferReadResult = recorder.read(data, 0, blockSize); 

        for (int i = 0; i < blockSize && i < bufferReadResult; i++) { 
         toTransform[i] = (double) data[i]/32768.0; // signed 
                     // 16 
                     // bit 
        } 

        transformer.ft(toTransform); 
        publishProgress(toTransform); 

        if (AudioRecord.ERROR_INVALID_OPERATION != read) { 
         try { 

          os.write(data); 
          tempIndex++; 

         } catch (IOException e) { 
          e.printStackTrace(); 
         } 
        } 

       } 

       try { 
        os.close(); 
       } catch (IOException e) { 
        e.printStackTrace(); 
       } 
      } 

      return null; 
     } 

     @Override 
     protected void onProgressUpdate(double[]... toTransform) { 
      canvas.drawColor(Color.GRAY); 
      Paint p = new Paint(); 
      for (int i = 0; i < toTransform[0].length; i++) { 

       int x = i; 
       int downy = (int) (100 - (toTransform[0][i] * 10)); 
       int upy = 100; 
       p.setColor(Color.rgb(downy % 256, i % 256, upy % 256)); 
       canvas.drawLine(x, upy, x, downy, p); 

      } 
      imageView.invalidate(); 
     } 

    } 

    public void writeAudioDataToFile(RecorderAsynctask asyntask) { 
     byte data[] = new byte[bufferSize]; 
     String filename = getTempFilename(); 
     FileOutputStream os = null; 

     try { 
      os = new FileOutputStream(filename); 
     } catch (FileNotFoundException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 

     int read = 0; 

     double[] toTransform = new double[256]; 

     if (null != os) { 
      while (isRecording) { 
       // sampleRateTextField.setText(recorder.getSampleRate()); 

       int bufferReadResult = recorder.read(data, 0, 256); 

       for (int i = 0; i < 256 && i < bufferReadResult; i++) { 
        toTransform[i] = (double) data[i]/32768.0; // signed 
                    // 16 
                    // bit 
       } 

       transformer.ft(toTransform); 
       asyntask.shareLockedfuntionProgreesUpdate(toTransform); 

       if (AudioRecord.ERROR_INVALID_OPERATION != read) { 
        try { 

         os.write(data); 
         tempIndex++; 

        } catch (IOException e) { 
         e.printStackTrace(); 
        } 
       } 

      } 

      try { 
       os.close(); 
      } catch (IOException e) { 
       e.printStackTrace(); 
      } 
     } 
    } 

    public void stopRecording() { 
     if (null != recorder) { 
      isRecording = false; 

      recorder.stop(); 
      recorder.release(); 

      recorder = null; 
      recordingThread = null; 
     } 

     copyWaveFile(getTempFilename(), getFilename()); 
     deleteTempFile(); 
    } 

    private void deleteTempFile() { 
     File file = new File(getTempFilename()); 

     file.delete(); 
    } 

    private void copyWaveFile(String inFilename, String outFilename) { 
     FileInputStream in = null; 
     FileOutputStream out = null; 
     long totalAudioLen = 0; 
     long totalDataLen = totalAudioLen + 36; 
     long longSampleRate = RECORDER_SAMPLERATE; 
     int channels = 1; 
     long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels/8; 

     byte[] data = new byte[bufferSize]; 

     try { 
      in = new FileInputStream(inFilename); 
      out = new FileOutputStream(outFilename); 
      totalAudioLen = in.getChannel().size(); 
      totalDataLen = totalAudioLen + 36; 

      WriteWaveFileHeader(out, totalAudioLen, totalDataLen, 
        longSampleRate, channels, byteRate); 

      while (in.read(data) != -1) { 
       out.write(data); 
      } 

      in.close(); 
      out.close(); 
     } catch (FileNotFoundException e) { 
      e.printStackTrace(); 
     } catch (IOException e) { 
      e.printStackTrace(); 
     } 
    } 

    private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen, 
      long totalDataLen, long longSampleRate, int channels, long byteRate) 
      throws IOException { 

     byte[] header = new byte[44]; 

     header[0] = 'R'; // RIFF/WAVE header 
     header[1] = 'I'; 
     header[2] = 'F'; 
     header[3] = 'F'; 
     header[4] = (byte) (totalDataLen & 0xff); 
     header[5] = (byte) ((totalDataLen >> 8) & 0xff); 
     header[6] = (byte) ((totalDataLen >> 16) & 0xff); 
     header[7] = (byte) ((totalDataLen >> 24) & 0xff); 
     header[8] = 'W'; 
     header[9] = 'A'; 
     header[10] = 'V'; 
     header[11] = 'E'; 
     header[12] = 'f'; // 'fmt ' chunk 
     header[13] = 'm'; 
     header[14] = 't'; 
     header[15] = ' '; 
     header[16] = 16; // 4 bytes: size of 'fmt ' chunk 
     header[17] = 0; 
     header[18] = 0; 
     header[19] = 0; 
     header[20] = 1; // format = 1 
     header[21] = 0; 
     header[22] = (byte) channels; 
     header[23] = 0; 
     header[24] = (byte) (longSampleRate & 0xff); 
     header[25] = (byte) ((longSampleRate >> 8) & 0xff); 
     header[26] = (byte) ((longSampleRate >> 16) & 0xff); 
     header[27] = (byte) ((longSampleRate >> 24) & 0xff); 
     header[28] = (byte) (byteRate & 0xff); 
     header[29] = (byte) ((byteRate >> 8) & 0xff); 
     header[30] = (byte) ((byteRate >> 16) & 0xff); 
     header[31] = (byte) ((byteRate >> 24) & 0xff); 
     header[32] = (byte) (2 * 16/8); // block align 
     header[33] = 0; 
     header[34] = RECORDER_BPP; // bits per sample 
     header[35] = 0; 
     header[36] = 'd'; 
     header[37] = 'a'; 
     header[38] = 't'; 
     header[39] = 'a'; 
     header[40] = (byte) (totalAudioLen & 0xff); 
     header[41] = (byte) ((totalAudioLen >> 8) & 0xff); 
     header[42] = (byte) ((totalAudioLen >> 16) & 0xff); 
     header[43] = (byte) ((totalAudioLen >> 24) & 0xff); 

     out.write(header, 0, 44); 
    } 

    public void closeThreadIfisnot() { 
     recordingThread.cancel(true); 
    } 
}