2016-03-07 52 views
0

我正在將Cloudera VM與CentOS一起使用,並使用單個Hadoop集羣進行設置。它使用Eclipse Luna。UDF for Pig中的錯誤 - 無法使用導入來解析

我有一個用於Pig的UDF。這是我第一次爲Pig編寫UDF。以前的豬腳本運行良好,沒有UDF的。當我運行此腳本豬,我得到以下錯誤:發生在豬腳本

Failed to generate logical plan. Nested exception: org.apache.pig.backend.executionengine.ExecException: ERROR 1070: Could not resolve EasyDates.EasyDateMethods.exec using imports: [, java.lang., org.apache.pig.builtin., org.apache.pig.impl.builtin.]

這個錯誤開始「CALC_UR_DAYS_BETWEEN」。見下文。

我花3-4小時上網(和測試)上進行搜索,他們都指 - 正確設置CLASSPATH, - 確保您註冊UDF, - 確保jar文件名是與軟件包名稱 相同 - 確保軟件包名稱是工作路徑中的目錄,並且與軟件包的名稱相同。

我已經完成了所有這些,但仍然出現錯誤。

至於我可以告訴大家,一切都正確命名,並且他們應該是:

  • Java包名稱:EasyDates
  • 罐名稱:EasyDates.jar
  • 罐路徑:/家庭/ Cloudera的/數據/ EasyDates/
  • 類名稱:EasyDateMethods
  • 坐落在.bash_profile文件: CLASSPATH = $ CLASSPATH:在/ usr /瓶/:/家庭/ Cloudera的/數據/ EasyDates/

幾個小時後我已經用盡了帖子。我找不到其他任何東西來嘗試。任何其他見解,非常感謝!

Java源:

package EasyDates; 
import java.io.IOException; 
import java.text.SimpleDateFormat; 
import java.util.Calendar; 
import java.util.Date; 

import org.apache.pig.EvalFunc; 
import org.apache.pig.data.Tuple; 
import org.apache.pig.impl.util.WrappedIOException; 


public class EasyDateMethods extends EvalFunc <String> { 

    public String exec(Tuple input) throws IOException { 
     if (input == null || input.size() == 0) 
      return "0"; 

     try{ 
      Date date1; 
      Date date2; 
      String strDiff="0"; 
      int intDiff = 0; 
      //Get the two string dates from the tuple: 
      String strDate1 = (String)input.get(0); 
      String strDate2 = (String)input.get(1); 
      //Convert them to Dates 
      date1 = stringToDate(strDate1); 
      date2 = stringToDate(strDate2); 
      //The the date difference: 
      intDiff = getDaysBetween(date1, date2); 
      //Since I must return the same data type as I call for this Pig method, this converts the 
      //difference in days to a string. 
      return Integer.toString(intDiff); 

     }catch(Exception e){ 
      throw WrappedIOException.wrap("Caught exception processing input row ", e); 
     } 

    } 

    private Date stringToDate(String theDateString) { 
     //Make sure the Pig script formats the date format this way or whatever format you choose. 
     //Just make sure they agree. 
     SimpleDateFormat dateFormatter = new SimpleDateFormat ("dd-MMM-yyyy"); 

     String dateInString = "12-May-2014"; 
     Date theDate; 
     java.util.Date dateObject = null; 

     try { 

      dateObject = dateFormatter.parse (theDateString); 

      System.out.println(dateObject); 
      System.out.println(dateFormatter.format (dateObject)); 
      //theDate = dateFormatter.format (dateObject); 

     } catch (Exception e) { 

      System.out.println(e.getMessage() + " " + e.getStackTrace()); 

     }; 
     return dateObject ; 

    } 


    static int getDaysBetween(Date curDate, Date prevDate) { 
     //Precondition: the difference in days between the current meter read date and the last one is not known 
     //Postcondition: the difference in days between the current meter read date and the last one is known 
     Calendar currentDate = Calendar.getInstance(); 
     Calendar previousDate = Calendar.getInstance(); 
     currentDate.setTime(curDate); 
     previousDate.setTime(prevDate); 
     int theDiffinDays = 0; 
     int theDiffinYears = 0; 
     int currentDay; 
     int previousDay; 
     int currentYear; 
     int previousYear; 
     try { 


      currentDay = currentDate.get(Calendar.DAY_OF_YEAR); 
      System.out.println("currentDay is " + currentDay); 
      previousDay = previousDate.get(Calendar.DAY_OF_YEAR); 
      System.out.println("previousDay is " + previousDay); 
      currentYear = currentDate.get(Calendar.YEAR); 
      System.out.println("currentYear is " + currentYear); 
      previousYear = previousDate.get(Calendar.YEAR); 
      System.out.println("previousYear is " + previousYear); 

      if (currentYear == previousYear) { 
       theDiffinDays = currentDay - previousDay; 
      } 
      else 
      { 
       theDiffinYears = currentYear - previousYear; 
       //This assumes 2 contiguous years, eg 2016 and 2017; so this wouldn't work if the diff in years is greater than 1 
       if (isLeapYear(previousYear)) { 
        //The following has not been corrected for leap year: 
        //If the previous year is a leap year 
        theDiffinDays = 366 - previousDay + currentDay; 
       } 
       else { 
        //If the current year is a leap year or neither year is a leap year: (because the day of year should be inherent whether leap or not) 
        theDiffinDays = 365 - previousDay + currentDay; 
       } 
      } 
      //return theDiffinDays; 
     } 
     catch (Exception ex){ 
      System.out.println(ex.getMessage() + " " + ex.getStackTrace()); 
     } 
     return theDiffinDays; 
    } 

    private static boolean isLeapYear(int theYear){ 
     //Precondition: the year is not designated as a leap year or not 
     boolean ans = false; 

     try { 


      switch (theYear){ 
      case 2004: ans = true; 
      break; 
      case 2008: ans = true; 
      break; 
      case 2012: ans = true; 
      break; 
      case 2016: ans = true; 
      break; 
      case 2020: ans = true; 
      break; 
      case 2024: ans = true; 
      break; 
      case 2028: ans = true; 
      break; 
      case 2032: ans = true; 
      break; 
      case 2036: ans = true; 
      break; 
      case 2040: ans = true; 
      break; 
      case 2044: ans = true; 
      break; 
      case 2048: ans = true; 
      break; 
      default: ans = false; 
      } 
     } 
     catch (Exception ex){ 
      System.out.println(ex.getMessage() + " " + ex.getStackTrace()); 

     } 

     return ans; 
    } 

} 

豬腳本:

--Simple Pig script to read in a file with dates, and pass the dates to the EasyDate class 

REGISTER /home/cloudera/data/EasyDates/EasyDates.jar; 
DEFINE DaysBetween EasyDates.EasyDateMethods; 


----------------------------------------------------Load the file-------------------------------------------- 
--The file needs two different dates in one row for this test 
devicePageCountAll = LOAD 'Data_For_Test_Jar.txt' USING PigStorage('\t') 
         AS (
         account_code:chararray, 
         serial_number:chararray,  
         reported_date:chararray, 
         reported_date2:chararray); 
--dump devicePageCountAll; 

--------------------------------------------------Get the date difference in days and store the result----------------- 

devicePageCountAll2 = foreach devicePageCountAll { 

CALC_UR_DAYS_BETWEEN = DaysBetween((ToString(REPLACE(reported_date, '\\"', ''), 'yyyy-MM-dd')), (ToString(REPLACE(reported_date2, '\\"', ''), 'yyyy-MM-dd'))); 


           generate 
             account_code, 
             serial_number, 
             reported_date, 
             reported_date2, 
             (CALC_UR_DAYS_BETWEEN > 15000 ? 0 : CALC_UR_DAYS_BETWEEN) AS days_since_last_reported; 
             } 
dump devicePageCountAll2; 

謝謝!

+0

僅供參考 - 你可以用它代替寄存器'-Dpig.additional.jars ='EasyDates.jar''櫃面你不想指定寄存器在豬檔案的頂部。 – rahulbmv

回答

1

取而代之的是

DEFINE DaysBetween EasyDates.EasyDateMethods; 

嘗試

DEFINE DaysBetween EasyDates.EasyDateMethods(); 
+0

感謝您的建議。它沒有工作。我現在認爲我的jar文件存在問題。 – PLB