# General crawling rules: Allow all bots to crawl the site by default
User-Agent: *
Allow: /

# Block HTTrack and similar bots from crawling the entire site
User-agent: HTTrack* 
Disallow: /

# Disallow crawling of dynamic or private sections that should not be indexed
Disallow: /V2/           # Block version 2 of the site, assuming it's outdated or not public-facing
Disallow: /V3/           # Block version 3 of the site for the same reason
Disallow: /cls/          # Disallow specific class/old content pages
Disallow: /clnt/         # Disallow client-specific or internal content
Disallow: /source/       # Disallow raw files or development files that shouldn't be indexed
Disallow: /khursheed-alam/       # Disallow raw files or development files that shouldn't be indexed
Disallow: /gia/       # Disallow raw files or development files that shouldn't be indexed

# Block sensitive areas like login, admin, and checkout (common for e-commerce sites)
# Disallow: /admin/        # Block admin login or control panel areas
# Disallow: /login/        # Block any login pages
# Disallow: /checkout/     # Block checkout pages to prevent sensitive data indexing

# Block any testing or development URLs (if applicable)
# Disallow: /test/         # Block test or staging sections if applicable
# Disallow: /dev/          # Block development sections

# Block specific user agent access if needed (e.g., bots that scrape too aggressively)
# User-agent: Googlebot-Image
# Disallow: /images/       # Block Googlebot for images from crawling specific image directories

# Block unwanted bots and web scrapers based on User-Agent string
# User-Agent: AhrefsBot
# Disallow: /

# User-Agent: SemrushBot
# Disallow: /

# User-Agent: BLEXBot
# Disallow: /

# Sitemap location (important for search engines to easily find your sitemap)
Sitemap: https://www.dotdezine.com/sitemap.xml