2014-12-19 74 views
1

我在谷歌和stackoverflow搜索了很多次,我發現了很多問題和答案,但我發現一些存在的錯誤。獲取純域名和擴展

所以我試圖寫我自己的算法BU我不知道。看起來像它的工作。 我使用了一個無限循環。這是一個問題嗎?如果是的話,如何避免這樣的循環。

如果在我的代碼中存在任何錯誤,我需要你的幫助。

注意:英語不是我的母語。

最好的問候。

/** 
    * @description get pure domain or domain.extension 
    * @param $p_domain 
    * @param $ext :: if 1 => with extension 
    * @return string|false 
    */ 
    public function get_pure_domain($p_domain, $ext=1) 
    { 

     //remove protocols 
     $clean = array("http://", "https://", "ftp://", "@", "ftps://", ":"); 
     $domain = str_replace($clean, "", $p_domain); 

     //remove slashes before dot 
     $dot_len=strpos($domain,'.'); 
     if (strpos(substr($domain,0,$dot_len) ,'/')) return false; 

     if (!$dot_len) 
     return false; 


     // if slashes exist remove after 
     if (strpos($domain, "/")) 
      $domain=substr($domain, 0, strpos($domain, "/")); 


     $clean_domain=$domain; 

     // explode with dot 
     $items = explode(".", $domain); 

     //select probable domain without extension 
     $sub_domain_count=count($items)-2; 
     $pure_domain=$items[$sub_domain_count]; 


     //if selected domain matched an extension, select provious (go back -1 in array) 

     $d_count=0; 
     while(1==1) 
     { 
      $d_count++; 

      if ($this->validate_ext($pure_domain)) 
       $pure_domain=$items[--$sub_domain_count]; 
      else 
       break; 

      //break infinite loop after 50 
      if ($d_count==50) break; 

     } 

     // if requested, return with extension 
     if ($ext==1) 
     { 
      $extension_start=strpos($clean_domain,$pure_domain)+strlen($pure_domain); 
      $pure_domain=$pure_domain.substr($clean_domain,$extension_start); 
     } 


     //total parts cannot be greater than 3 
     $items = explode(".", $pure_domain); 
     if(count($items)>2) 
     { 
      $pure_domain = $items[count($items) - 3] . '.' . $items[count($items) - 2] . '.' . $items[count($items) - 1]; 

      //search for last two parts in extensions if not exist return these two parts as domain 
      if (!$this->validate_ext($items[count($items) - 2] . '.' . $items[count($items) - 1])) 
      { 
       if ($ext==1) 
        $pure_domain=$items[count($items) - 2] . '.' . $items[count($items) - 1]; 
       else 
        $pure_domain=$items[count($items) - 2]; 
      } 

     } 

     //if extensions and domain are same algorithm may take subdomain as domain 
     $items = explode(".", $pure_domain); 
     if(count($items)>1) 
     { 
      if ($items[count($items) - 1] == $items[count($items) - 2]) 
      { 
       if ($ext==1) 
        $pure_domain = $items[count($items) - 2] . '.' . $items[count($items) - 1]; 
       else 
        $pure_domain=$items[count($items) - 2]; 
      } 
     } 


     //if domain numeric may be an ip return false 
     if (is_numeric($pure_domain)) 
      return false; 

     //return result 
     return $pure_domain; 

    } 

public function validate_ext($ext) 
    { 
     if (strpos($ext,".")) str_replace('.','',$ext); 

     if (in_array($ext, Data::$extensions)) 
      return true; 
     else 
      return false; 

    } 

class Data 
{ 
    //18-12-2014 all domains extension 
    public static $extensions = array("abogado", "ac", "academy", "accountants", "active", "actor", "ad", "adult", "ae", "aero", "af", "ag", "agency", "ai", 
     "airforce", "al", "allfinanz", "alsace", "am", "an", "android", "ao", "aq", "aquarelle", "ar", "archi", "army", "arpa", "as", "asia", "associates", 
     "at", "attorney", "au", "auction", "audio", "autos", "aw", "ax", "axa", "az", "ba", "band", "bar", "bargains", "bayern", "bb", "bd", "be", "beer", "berlin", 
     "best", "bf", "bg", "bh", "bi", "bid", "bike", "bio", "biz", "bj", "black", "blackfriday", "bloomberg", "blue", "bm", "bmw", "bn", "bnpparibas", "bo", "boo", 
     "boutique", "br", "brussels", "bs", "bt", "budapest", "build", "builders", "business", "buzz", "bv", "bw", "by", "bz", "bzh", "ca", "cab", "cal", "camera", 
     "camp", "cancerresearch", "capetown", "capital", "caravan", "cards", "care", "career", "careers", "cartier", "casa", "cash", "cat", "catering", "cc", 
     "cd", "center", "ceo", "cern", "cf", "cg", "ch", "channel", "cheap", "christmas", "chrome", "church", "ci", "citic", "city", "ck", "cl", "claims", "cleaning", 
     "click", "clinic", "clothing", "club", "cm", "cn", "co", "coach", "codes", "coffee", "college", "cologne", "com", "community", "company", "computer", "condos", 
     "construction", "consulting", "contractors", "cooking", "cool", "coop", "country", "cr", "credit", "creditcard", "cricket", "crs", "cruises", "cu", 
     "cuisinella", "cv", "cw", "cx", "cy", "cymru", "cz", "dad", "dance", "dating", "day", "de", "deals", "degree", "delivery", "democrat", "dental", "dentist", 
     "desi", "diamonds", "diet", "digital", "direct", "directory", "discount", "dj", "dk", "dm", "dnp", "do", "domains", "doosan", "durban", "dvag", "dz", "eat", 
     "ec", "edu", "education", "ee", "eg", "email", "emerck", "energy", "engineer", "engineering", "enterprises", "equipment", "er", "es", "esq", "estate", "et", 
     "eu", "eurovision", "eus", "events", "everbank", "exchange", "expert", "exposed", "fail", "farm", "fashion", "feedback", "fi", "finance", "financial", 
     "firmdale", "fish", "fishing", "fitness", "fj", "fk", "flights", "florist", "flsmidth", "fly", "fm", "fo", "foo", "forsale", "foundation", "fr", "frl", 
     "frogans", "fund", "furniture", "futbol", "ga", "gal", "gallery", "garden", "gb", "gbiz", "gd", "ge", "gent", "gf", "gg", "gh", "gi", "gift", "gifts", "gives", 
     "gl", "glass", "gle", "global", "globo", "gm", "gmail", "gmo", "gmx", "gn", "google", "gop", "gov", "gp", "gq", "gr", "graphics", "gratis", "green", "gripe", 
     "gs", "gt", "gu", "guide", "guitars", "guru", "gw", "gy", "hamburg", "haus", "healthcare", "help", "here", "hiphop", "hiv", "hk", "hm", "hn", "holdings", 
     "holiday", "homes", "horse", "host", "hosting", "house", "how", "hr", "ht", "hu", "ibm", "id", "ie", "il", "im", "immo", "immobilien", "in", "industries", 
     "info", "ing", "ink", "institute", "insure", "int", "international", "investments", "io", "iq", "ir", "irish", "is", "it", "iwc", "je", "jetzt", "jm", "jo", 
     "jobs", "joburg", "jp", "juegos", "kaufen", "ke", "kg", "kh", "ki", "kim", "kitchen", "kiwi", "km", "kn", "koeln", "kp", "kr", "krd", "kred", "kw", "ky", "kz", 
     "la", "lacaixa", "land", "latrobe", "lawyer", "lb", "lc", "lds", "lease", "legal", "lgbt", "li", "lidl", "life", "lighting", "limited", "limo", "link", "lk", 
     "loans", "london", "lotto", "lr", "ls", "lt", "ltda", "lu", "luxe", "luxury", "lv", "ly", "ma", "madrid", "maison", "management", "mango", "market", "marketing", 
     "mc", "md", "me", "media", "meet", "melbourne", "meme", "memorial", "menu", "mg", "mh", "miami", "mil", "mini", "mk", "ml", "mm", "mn", "mo", "mobi", "moda", 
     "moe", "monash", "money", "mormon", "mortgage", "moscow", "motorcycles", "mov", "mp", "mq", "mr", "ms", "mt", "mu", "museum", "mv", "mw", "mx", "my", "mz", "na", 
     "nagoya", "name", "navy", "nc", "ne", "net", "network", "neustar", "new", "nexus", "nf", "ng", "ngo", "nhk", "ni", "ninja", "nl", "no", "np", "nr", "nra", "nrw", 
     "nu", "nyc", "nz", "okinawa", "om", "ong", "onl", "ooo", "org", "organic", "osaka", "otsuka", "ovh", "pa", "paris", "partners", "parts", "party", "pe", "pf", "pg", 
     "ph", "pharmacy", "photo", "photography", "photos", "physio", "pics", "pictures", "pink", "pizza", "pk", "pl", "place", "plumbing", "pm", "pn", "pohl", "poker", 
     "porn", "post", "pr", "praxi", "press", "pro", "prod", "productions", "prof", "properties", "property", "ps", "pt", "pub", "pw", "py", "qa", "qpon", "quebec", 
     "re", "realtor", "recipes", "red", "rehab", "reise", "reisen", "reit", "ren", "rentals", "repair", "report", "republican", "rest", "restaurant", "reviews", 
     "rich", "rio", "rip", "ro", "rocks", "rodeo", "rs", "rsvp", "ru", "ruhr", "rw", "ryukyu", "sa", "saarland", "samsung", "sarl", "sb", "sc", "sca", "scb", "schmidt", 
     "schule", "schwarz", "science", "scot", "sd", "se", "services", "sew", "sexy", "sg", "sh", "shiksha", "shoes", "si", "singles", "sj", "sk", "sky", "sl", "sm", "sn", 
     "so", "social", "software", "sohu", "solar", "solutions", "soy", "space", "spiegel", "sr", "st", "su", "supplies", "supply", "support", "surf", "surgery", 
     "suzuki", "sv", "sx", "sy", "sydney", "systems", "sz", "taipei", "tatar", "tattoo", "tax", "tc", "td", "technology", "tel", "tf", "tg", "th", "tienda", "tips", 
     "tirol", "tj", "tk", "tl", "tm", "tn", "to", "today", "tokyo", "tools", "top", "town", "toys", "tp", "tr", "trade", "training", "travel", "trust", "tt", "tui", 
     "tv", "tw", "tz", "ua", "ug", "uk", "university", "uno", "uol", "us", "uy", "uz", "va", "vacations", "vc", "ve", "vegas", "ventures", "versicherung", "vet", "vg", 
     "vi", "viajes", "villas", "vision", "vlaanderen", "vn", "vodka", "vote", "voting", "voto", "voyage", "vu", "wales", "wang", "watch", "webcam", "website", 
     "wed", "wedding", "wf", "whoswho", "wien", "wiki", "williamhill", "wme", "work", "works", "world", "ws", "wtc", "wtf", "xn--1qqw23a", "xn--3bst00m", 
     "xn--3ds443g", "xn--3e0b707e", "xn--45brj9c", "xn--45q11c", "xn--4gbrim", "xn--55qw42g", "xn--55qx5d", "xn--6frz82g", "xn--6qq986b3xl", "xn--80adxhks", 
     "xn--80ao21a", "xn--80asehdb", "xn--80aswg", "xn--90a3ac", "xn--c1avg", "xn--cg4bki", "xn--clchc0ea0b2g2a9gcd", "xn--czr694b", "xn--czrs0t", 
     "xn--czru2d", "xn--d1acj3b", "xn--d1alf", "xn--fiq228c5hs", "xn--fiq64b", "xn--fiqs8s", "xn--fiqz9s", "xn--flw351e", "xn--fpcrj9c3d", "xn--fzc2c9e2c", 
     "xn--gecrj9c", "xn--h2brj9c", "xn--hxt814e", "xn--i1b6b1a6a2e", "xn--io0a7i", "xn--j1amh", "xn--j6w193g", "xn--kprw13d", "xn--kpry57d", "xn--kput3i", 
     "xn--l1acc", "xn--lgbbat1ad8j", "xn--mgb9awbf", "xn--mgba3a4f16a", "xn--mgbaam7a8h", "xn--mgbab2bd", "xn--mgbayh7gpa", "xn--mgbbh1a71e", 
     "xn--mgbc0a9azcg", "xn--mgberp4a5d4ar", "xn--mgbx4cd0ab", "xn--ngbc5azd", "xn--node", "xn--nqv7f", "xn--nqv7fs00ema", "xn--o3cw4h", "xn--ogbpf8fl", 
     "xn--p1acf", "xn--p1ai", "xn--pgbs0dh", "xn--q9jyb4c", "xn--qcka1pmc", "xn--rhqv96g", "xn--s9brj9c", "xn--ses554g", "xn--unup4y", 
     "xn--vermgensberater-ctb", "xn--vermgensberatung-pwb", "xn--vhquv", "xn--wgbh1c", "xn--wgbl6a", "xn--xhq521b", "xn--xkc2al3hye2a", 
     "xn--xkc2dl3a5ee0h", "xn--yfro4i67o", "xn--ygbi2ammx", "xn--zfr164b", "xxx", "xyz", "yachts", "yandex", "ye", "yoga", "yokohama", "youtube", "yt", "za", 
     "zip", "zm", "zone", "zw"); 

} 

回答

1

沒有人迴應我的問題,所以我提高了我的方法,我會用這樣使用:

public function get_pure_domain($p_domain, $ext=1) 
    { 

     //remove protocols 
     $clean = array("http://", "https://", "ftp://", "@", "ftps://", ":"); 
     $domain = str_replace($clean, "", $p_domain); 

     //remove slashes before dot 
     $dot_len=strpos($domain,'.'); 
     if (strpos(substr($domain,0,$dot_len) ,'/')) return false; 

     if (!$dot_len) 
     return false; 


     // if slashes exist remove after 
     if (strpos($domain, "/")) 
      $domain=substr($domain, 0, strpos($domain, "/")); 


     $clean_domain=$domain; 

     // explode with dot 
     $items = explode(".", $domain); 

     //select probable domain without extension 
     $sub_domain_count=count($items)-2; 
     $pure_domain=$items[$sub_domain_count]; 


     //if selected domain matched an extension, select provious (go back -1 in array) 

     $d_count=0; 
     while(1==1) 
     { 
      $d_count++; 

      if ($this->validate_ext($pure_domain)) 
       $pure_domain=$items[--$sub_domain_count]; 
      else 
       break; 

      //break infinite loop after 50 
      if ($d_count==50) break; 

     } 

     // if requested, return with extension 
     if ($ext==1) 
     { 
      $extension_start=strpos($clean_domain,$pure_domain)+strlen($pure_domain); 
      $pure_domain=$pure_domain.substr($clean_domain,$extension_start); 
     } 


     //total parts cannot be greater than 3 
     $items = explode(".", $pure_domain); 
     if(count($items)>2) 
     { 
      $pure_domain = $items[count($items) - 3] . '.' . $items[count($items) - 2] . '.' . $items[count($items) - 1]; 

      //search for last two parts in extensions if not exist return these two parts as domain 
      if (!$this->validate_ext($items[count($items) - 2] . '.' . $items[count($items) - 1])) 
      { 
       if ($ext==1) 
        $pure_domain=$items[count($items) - 2] . '.' . $items[count($items) - 1]; 
       else 
        $pure_domain=$items[count($items) - 2]; 
      } 

     } 

     //if extensions and domain are same algorithm may take subdomain as domain 
     $items = explode(".", $pure_domain); 
     if(count($items)>1) 
     { 
      if ($items[count($items) - 1] == $items[count($items) - 2]) 
      { 
       if ($ext==1) 
        $pure_domain = $items[count($items) - 2] . '.' . $items[count($items) - 1]; 
       else 
        $pure_domain=$items[count($items) - 2]; 
      } 
     } 


     //if domain numeric may be an ip return false 
     if (is_numeric($pure_domain)) 
      return false; 

     //return result 
     return $pure_domain; 

    }