# Some sites embed details in dataāattributes: year = c.get("data-year") language = c.get("data-language") quality = c.get("data-quality")
class FilmyFlyScraper(BaseScraper): SEARCH_URL = "https://www.filmyfly.in/search/query"
class Filmy4wapScraper(BaseScraper): SEARCH_URL = "https://www.filmy4wap.in/search?q=query"
# Collect raw results from each site raw = [] for scraper in (FilmyFlyScraper, Filmy4wapScraper, FilmywapScraper): try: raw.extend(scraper.search(query_norm)) except Exception as e: # We never want a single site failure to break the whole flow print(f"[ā ļø] scraper.__name__ failed: e") # Some sites embed details in dataāattributes: year = c
return "query": query, "normalized_query": query_norm, "total_matches": len(matches), "results": matches,
# Example meta: "2022 Hindi 1080p" meta = c.select_one("span.meta") year, language, quality = None, None, None if meta: txt = meta.get_text() m_year = re.search(r"\b(20\d2)\b", txt) year = m_year.group(1) if m_year else None language = "Hindi" if "hindi" in txt.lower() else None qual_match = re.search(r"\b(720p|1080p|4k)\b", txt, re.I) quality = qual_match.group(0) if qual_match else None
# ---------------------------------------------------------------------- # 5ļøā£ Commandāline interface (nice for quick testing) # ---------------------------------------------------------------------- if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Search Hindi movies on FilmyFly / Filmy4wap / Filmywap." ) parser.add_argument( "title", nargs="+", help="Movie title (e.g. 'Da-unaloda stainda apa rahula 2022 hindi')", ) parser.add_argument( "-o", "--output", default="movie_search_result.json", help="File to write JSON output to (default: %(default)s)", ) args = parser.parse_args() movie_finder
# ---------------------------------------------------------------------- # 3ļøā£ Matching logic (exact first, then fuzzy) # ---------------------------------------------------------------------- def match_results( results: List[Dict[str, Any]], query_norm: str, min_fuzzy: int = 85, ) -> List[Dict[str, Any]]: """Return a list of results that match the query.""" exact = [r for r in results if normalize(r["title"]) == query_norm] if exact: return exact
# Year & language are usually in a <p> like "2022 | Hindi | 720p" meta = c.select_one("p.movie-meta") year, language, quality = None, None, None if meta: parts = [p.strip() for p in meta.get_text(separator="|").split("|")] for p in parts: if re.fullmatch(r"\d4", p): year = p elif p.lower() in "hindi", "english", "telugu", "marathi": language = p else: quality = p
print(json.dumps(data, ensure_ascii=False, indent=2)) this feature into your existing project | Scenario | Integration steps | |----------|-------------------| | Existing Flask/Django API | 1. Copy the whole file into a module (e.g. movie_finder.py ). 2. Import search_movie inside a view/endpoint. 3. Return jsonify(search_movie(title)) . | | Desktop GUI (Tkinter / PyQt) | 1. Wire a āSearchā button to search_movie(user_input) . 2. Populate a table/list with result["title"] , year , quality , and a clickable hyperlink ( result["url"] ). | | Homeāassistant / NodeāRED | 1. Expose the script via a lightweight HTTP server (e.g. uvicorn + FastAPI). 2. Call the endpoint from your automation flow and parse the JSON. | | Filmy4wap and Filmywap
""" Feature: Search for a Hindi movie (e.g. "Da-unaloda stainda apa rahula -2022") across FilmyFly, Filmy4wap and Filmywap, and return structured result data.
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "+")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.result-item") results = [] for c in cards: a = c.select_one("a.title") if not a: continue title = a.get_text(strip=True) href = cls._clean_link(a["href"])
results.append( "source": "FilmyFly", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results