Commit 1a6529ab1ce53a78b802e74cf5e2e233e70f569e

Authored by Paulo Graça
1 parent 0eae10bba7
Exists in DSpace52++

Update robots.txt

Showing 1 changed file with 97 additions and 0 deletions   Show diff stats
dspace-jspui/src/main/webapp/robots.txt
... ... @@ -152,20 +152,36 @@ Disallow: /
152 152  
153 153  
154 154 # RCAAP: remove other crawlers
  155 +User-agent: 360Spider
  156 +Disallow: /
155 157 User-agent: adbeat_bot
156 158 Disallow: /
157 159 User-agent: AhrefsBot
158 160 Disallow: /
  161 +User-agent: aiHitBot
  162 +Disallow: /
159 163 User-agent: AITCSRobot
160 164 Disallow: /
161 165 User-agent: Alexibot
162 166 Disallow: /
  167 +User-agent: BacklinkCrawler
  168 +Disallow: /
163 169 User-agent: Baiduspider
164 170 Disallow: /
165 171 User-agent: BLEXBot
166 172 Disallow: /
  173 +User-agent: careerbot
  174 +Disallow: /
  175 +User-agent: CCBot
  176 +Disallow: /
167 177 User-agent: Cliqzbot
168 178 Disallow: /
  179 +User-Agent: CloudServerMarketSpider
  180 +Disallow: /
  181 +User-agent: coccoc
  182 +Disallow: /
  183 +User-agent: Domain Re-Animator Bot
  184 +Disallow: /
169 185 User-agent: DotBot
170 186 Disallow: /
171 187 User-agent: dotbot
... ... @@ -174,34 +190,110 @@ User-agent: Exabot
174 190 Disallow: /
175 191 User-agent: expo9
176 192 Disallow: /
  193 +User-agent: fr-crawler
  194 +Disallow: /
  195 +User-agent: HaosouSpider
  196 +Disallow: /
177 197 User-agent: Huaweisymantecspider
178 198 Disallow: /
  199 +User-agent: ICCrawler
  200 +Disallow: /
  201 +User-agent: ImplisenseBot
  202 +Disallow: /
179 203 User-agent: InfluenceBot
180 204 Disallow: /
  205 +User-agent: Kraken
  206 +Disallow: /
  207 +User-agent: LinkStats
  208 +Disallow: /
  209 +User-agent: Lipperhey-Kaus-Australis
  210 +Disallow: /
181 211 User-agent: ltx71
182 212 Disallow: /
183 213 User-agent: ltx71 - (http://ltx71.com/)
184 214 Disallow: /
  215 +User-agent: JobboerseBot
  216 +Disallow: /
  217 +User-agent: jobs.de-Robot
  218 +Disallow: /
  219 +User-agent: magpie-crawler
  220 +Disallow: /
185 221 User-agent: MaxPointCrawler
186 222 Disallow: /
  223 +User-agent: meanpathbot
  224 +Disallow: /
  225 +User-agent: MegaIndex.ru
  226 +Disallow: /
  227 +User-agent: megaindex.com
  228 +Disallow: /
  229 +User-agent: MetaJobBot
  230 +Disallow: /
  231 +User-agent: mindUpBot
  232 +Disallow: /
187 233 User-agent: MJ12bot
188 234 Disallow: /
  235 +User-agent: oBot
  236 +Disallow: /
  237 +User-agent: OpenHoseBot
  238 +Disallow: /
  239 +User-agent: Plista
  240 +Disallow: /
  241 +User-agent: plukkie
  242 +Disallow: /
  243 +User-agent: Qwantify
  244 +Disallow: /
  245 +User-agent: R6_CommentReader
  246 +Disallow: /
189 247 User-agent: rogerbot
190 248 Disallow: /
  249 +User-agent: SafeDNSBot
  250 +Disallow: /
  251 +User-agent: Screaming Frog SEO Spider
  252 +Disallow: /
  253 +User-agent: SearchmetricsBot
  254 +Disallow: /
191 255 User-agent: SemrushBot
192 256 Disallow: /
193 257 User-agent: SemrushBot-SA
194 258 Disallow: /
  259 +user-agent: SEOdiver
  260 +disallow: /
  261 +User-agent: SEOkicks-Robot
  262 +Disallow: /
195 263 User-agent: seoscanners.net
196 264 Disallow: /
  265 +User-agent: sg-Orbiter
  266 +Disallow: /
  267 +User-agent: sistrix
  268 +Disallow: /
  269 +User-agent: SISTRIX Crawler
  270 +Disallow: /
  271 +User-agent: SISTRIX
  272 +Disallow: /
197 273 User-agent: SiteSnagger
198 274 Disallow: /
  275 +User-agent: Spiderbot
  276 +Disallow: /
  277 +User-agent: spbot
  278 +Disallow: /
199 279 User-agent: SurveyBot
200 280 Disallow: /
  281 +User-agent: ThumbSniper
  282 +Disallow: /
  283 +User-Agent: trendictionbot
  284 +Disallow: /
201 285 User-agent: turnitinbot
202 286 Disallow: /
  287 +User-agent: um-IC
  288 +Disallow: /
  289 +user-agent: UnisterBot
  290 +disallow: /
  291 +User-agent: WBSearchBot
  292 +Disallow: /
203 293 User-agent: WebReaper
204 294 Disallow: /
  295 +User-agent: wotbox
  296 +Disallow: /
205 297 User-agent: Xaldon_WebSpider
206 298 Disallow: /
207 299 User-agent: Xenu’s
... ... @@ -211,6 +303,11 @@ Disallow: /
211 303 User-agent: YandexBot
212 304 Disallow: /
213 305  
  306 +
  307 +# RCAAP Slow down all bots
  308 +User-agent: *
  309 +Crawl-delay: 10
  310 +
214 311 # RCAAP delay crallers
215 312 User-agent: bingbot
216 313 Crawl-delay: 10
... ...